Skip to content

Commit

Permalink
tweaks; restore old tuning tab for now
Browse files Browse the repository at this point in the history
  • Loading branch information
fredrik-johansson committed Jan 10, 2025
1 parent ecd272c commit 1209dc2
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 35 deletions.
4 changes: 4 additions & 0 deletions src/mpn_extras.h
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,8 @@ FLINT_DLL extern const flint_mpn_sqrhigh_normalised_func_t flint_mpn_sqrhigh_nor
#if FLINT_HAVE_ASSEMBLY_x86_64_adx
# define FLINT_MPN_MULLOW_FUNC_TAB_WIDTH 8
# define FLINT_MPN_MULHIGH_FUNC_TAB_WIDTH 13
/* n with best effective cycles/limb (and current largest assembly case) -- used by mulhigh_recursive */
# define FLINT_MPN_MULHIGH_BEST_TAB_N 9
# define FLINT_MPN_SQRHIGH_FUNC_TAB_WIDTH 8
# define FLINT_MPN_MULHIGH_NORMALISED_FUNC_TAB_WIDTH 9
# define FLINT_MPN_SQRHIGH_NORMALISED_FUNC_TAB_WIDTH 8
Expand All @@ -632,6 +634,7 @@ FLINT_DLL extern const flint_mpn_sqrhigh_normalised_func_t flint_mpn_sqrhigh_nor
#elif FLINT_HAVE_ASSEMBLY_armv8
# define FLINT_MPN_MULLOW_FUNC_TAB_WIDTH 0
# define FLINT_MPN_MULHIGH_FUNC_TAB_WIDTH 8
# define FLINT_MPN_MULHIGH_BEST_TAB_N 8
# define FLINT_MPN_SQRHIGH_FUNC_TAB_WIDTH 8
# define FLINT_MPN_MULHIGH_NORMALISED_FUNC_TAB_WIDTH 0
# define FLINT_MPN_SQRHIGH_NORMALISED_FUNC_TAB_WIDTH 0
Expand All @@ -643,6 +646,7 @@ FLINT_DLL extern const flint_mpn_sqrhigh_normalised_func_t flint_mpn_sqrhigh_nor
/* TODO: generic hardcoded mullows */
# define FLINT_MPN_MULLOW_FUNC_TAB_WIDTH 0
# define FLINT_MPN_MULHIGH_FUNC_TAB_WIDTH 16
# define FLINT_MPN_MULHIGH_BEST_TAB_N 16
# define FLINT_MPN_SQRHIGH_FUNC_TAB_WIDTH 2
# define FLINT_MPN_MULHIGH_NORMALISED_FUNC_TAB_WIDTH 0
# define FLINT_MPN_SQRHIGH_NORMALISED_FUNC_TAB_WIDTH 0
Expand Down
55 changes: 28 additions & 27 deletions src/mpn_extras/mulhigh.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,34 +19,35 @@
#include "mpn_extras.h"

/* Generated by tune-mulhigh.c */

const signed short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE] =
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 12, 12, 13, 13, 14, 16, 16, 17, 18, 19,
18, 20, 20, 20, 24, 20, 24, 20, 21, 22, 22, 24, 24, 26, 26, 24, 25, 30, 30, 31, 32, 30, 32, 32, 36, 35, 36, 32, 40, 38,
39, 40, 40, 37, 39, 40, 40, 40, 40, 40, 48, 48, 44, 48, 52, 52, 40, 40, 44, 52, 44, 44, 44, 52, 64, 44, 48, 48, 48, 64,
64, 52, 56, 64, 52, 60, 56, 56, 56, 56, 64, 64, 64, 60, 64, 64, 68, 64, 60, 64, 64, 64, 64, 64, 72, 68, 64, 76, 76, 68,
68, 72, 80, 72, 72, 76, 72, 72, 80, 72, 72, 80, 72, 72, 80, 80, 80, 88, 76, 80, 80, 76, 80, 84, 80, 80, 88, 88, 96, 96,
96, 92, 84, 96, 96, 92, 88, 96, 96, 100, 96, 92, 96, 96, 100, 96, 96, 120, 112, 120, 104, 112, 116, 120, 120, 112, 112, 124, 112, 120,
112, 120, 120, 116, 120, 124, 120, 128, 120, 116, 116, 124, 120, 120, 128, 120, 124, 120, 128, 140, 140, 128, 128, 128, 140, 144, 140, 140, 144, 144,
140, 128, 140, 140, 144, 156, 156, 140, 160, 140, 156, 156, 148, 144, 160, 160, 156, 144, 160, 156, 156, 156, 156, 156, 160, 144, 144, 156, 160, 152,
140, 148, 144, 160, 160, 156, 156, 152, 156, 160, 160, 156, 156, 160, 160, 160, 156, 156, 156, 152, 156, 156, 160, 156, 160, 160, 156, 160, 160, 160,
156, 168, 160, 156, 160, 172, 172, 160, 188, 188, 172, 188, 160, 192, 188, 192, 172, 192, 188, 188, 188, 188, 188, 188, 172, 188, 188, 204, 152, 152,
188, 152, 164, 156, 156, 160, 168, 172, 180, 184, 176, 172, 180, 208, 196, 200, 204, 204, 172, 172, 172, 168, 176, 184, 184, 168, 176, 204, 180, 196,
188, 192, 204, 176, 212, 180, 176, 176, 184, 180, 180, 184, 188, 188, 212, 192, 192, 232, 220, 232, 208, 228, 184, 180, 200, 192, 204, 208, 188, 212,
204, 208, 212, 220, 268, 260, 224, 228, 264, 264, 196, 196, 196, 208, 204, 212, 216, 204, 204, 252, 232, 256, 264, 284, 272, 264, 252, 276, 204, 220,
220, 224, 236, 248, 236, 244, 248, 252, 252, 268, 272, 264, 252, 208, 220, 224, 220, 228, 256, 228, 248, 252, 264, 260, 260, 300, 272, 280, 268, 292,
328, 228, 248, 260, 256, 260, 264, 260, 276, 276, 276, 296, 292, 288, 324, 300, 244, 260, 304, 264, 268, 272, 272, 284, 268, 296, 300, 292, 300, 324,
316, 272, 268, 264, 276, 288, 288, 280, 288, 324, 268, 324, 316, 284, 304, 300, 268, 300, 280, 304, 300, 292, 324, 296, 320, 304, 268, 328, 324, 284,
296, 304, 296, 296, 296, 328, 340, 372, 364, 432, 444, 436, 436, 448, 316, 456, 464, 436, 440, 480, 372, 448, 452, 468, 472, 472, 480, 376, 472, 412,
460, 436, 456, 456, 448, 464, 408, 472, 464, 464, 472, 464, 472, 472, 456, 464, 464, 464, 464, 456, 464, 480, 416, 472, 472, 480, 480, 432, 464, 440,
456, 464, 464, 480, 448, 448, 448, 472, 464, 464, 456, 480, 456, 448, 456, 472, 448, 472, 456, 472, 464, 448, 472, 480, 480, 472, 448, 472, 480, 464,
480, 472, 464, 472, 464, 456, 480, 480, 456, 480, 456, 480, 472, 456, 472, 464, 464, 464, 472, 456, 480, 480, 472, 472, 472, 472, 456, 480, 480, 480,
480, 480, 480, 464, 464, 472, 480, 544, 536, 472, 472, 480, 480, 536, 472, 472, 480, 560, 552, 480, 568, 560, 552, 568, 568, 576, 568, 568, 576, 528,
568, 576, 568, 480, 480, 576, 552, 544, 560, 552, 552, 560, 560, 576, 568, 576, 576, 568, 576, 560, 568, 576, 552, 552, 552, 576, 560, 544, 560, 560,
560, 568, 576, 568, 560, 568, 568, 568, 560, 576, 576, 544, 576, 560, 560, 576, 568, 576, 568, 568, 568, 576, 560, 568, 568, 576, 568, 576, 544, 552,
576, 576, 552, 552, 560, 576, 560, 568, 576, 568, 560, 544, 576, 576, 576, 576, 576, 560, 560, 576, 568, 568, 568, 576, 568, 568, 568, 576, 576, 576,
560, 576, 568, 576, 560, 560, 568, 576, 568, 576, 576, 568, 576, 560, 576, 576, 576, 560, 576, 568, 568, 576, 568, 576, 576, 576, 568, 576, 576, 576,
576, 568, 576, 560, 568, 576, 576, 568, 576, 576, 576, 568, 576, 568, 568, 568, 568, 576, 568, 576, 576, 568, 576, 560, 568, 560, 576, 576, 568, 560,
568, 576, 552, 776, 576, 560, 560, 576, 568, 576, 576, 576, 776, 576, 560, 576, 784, 568, 576, 776, 568, 792, 776, 576, 776, 776, 776, 776, 776, 784,
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18,
18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36,
38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56,
60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76,
88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88,
80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112,
120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140,
144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156,
160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172,
172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188,
188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188,
184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196,
188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264,
284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272,
284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284,
276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324,
300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324,
300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456,
472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456,
480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480,
472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472,
480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560,
576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544,
576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576,
552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560,
568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568,
576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776,
776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776,
792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824,
870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856,
Expand Down
31 changes: 31 additions & 0 deletions src/mpn_extras/mulhigh_basecase.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,18 @@ mp_limb_t flint_mpn_mulhigh_11(nn_ptr r, nn_srcptr x, nn_srcptr y)
{
mp_limb_t w0, w1, lo, w2, cy;

#if 0
w0 = flint_mpn_mulhigh_10(r, x + 1, y);
#else
w0 = flint_mpn_mulhigh_9(r, x + 2, y);
r[9] = mpn_addmul_1(r, x + 2, 9, y[9]);
umul_ppmm(w1, lo, x[1], y[8]);
umul_ppmm(cy, w2, x[1], y[9]);
add_ssaaaa(cy, w0, cy, w0, 0, w1);
add_ssaaaa(cy, w0, cy, w0, 0, w2);
MPN_INCR_U(r, 10, cy);
#endif

r[10] = mpn_addmul_1(r, x + 1, 10, y[10]);
umul_ppmm(w1, lo, x[0], y[9]);
umul_ppmm(cy, w2, x[0], y[10]);
Expand All @@ -51,13 +62,33 @@ mp_limb_t flint_mpn_mulhigh_11(nn_ptr r, nn_srcptr x, nn_srcptr y)
MPN_INCR_U(r, 11, cy);

return w0;

return w0;
}

mp_limb_t flint_mpn_mulhigh_12(nn_ptr r, nn_srcptr x, nn_srcptr y)
{
mp_limb_t w0, w1, lo, w2, cy;

#if 0
w0 = flint_mpn_mulhigh_11(r, x + 1, y);
#else
w0 = flint_mpn_mulhigh_9(r, x + 3, y);
r[9] = mpn_addmul_1(r, x + 3, 9, y[9]);
umul_ppmm(w1, lo, x[2], y[8]);
umul_ppmm(cy, w2, x[2], y[9]);
add_ssaaaa(cy, w0, cy, w0, 0, w1);
add_ssaaaa(cy, w0, cy, w0, 0, w2);
MPN_INCR_U(r, 10, cy);

r[10] = mpn_addmul_1(r, x + 2, 10, y[10]);
umul_ppmm(w1, lo, x[1], y[9]);
umul_ppmm(cy, w2, x[1], y[10]);
add_ssaaaa(cy, w0, cy, w0, 0, w1);
add_ssaaaa(cy, w0, cy, w0, 0, w2);
MPN_INCR_U(r, 11, cy);
#endif

r[11] = mpn_addmul_1(r, x + 1, 11, y[11]);
umul_ppmm(w1, lo, x[0], y[10]);
umul_ppmm(cy, w2, x[0], y[11]);
Expand Down
13 changes: 5 additions & 8 deletions src/mpn_extras/mulhigh_recursive.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,22 @@

#include "mpn_extras.h"

/* Tuned for x86-64 */
#define BEST_BASECASE_N 9

mp_limb_t
_flint_mpn_mulhigh_n_recursive(mp_ptr r, mp_srcptr x, mp_srcptr y, mp_size_t n)
{
if (FLINT_HAVE_MULHIGH_FUNC(n))
{
return flint_mpn_mulhigh_func_tab[n](r, x, y);
}
else if (n <= 2 * BEST_BASECASE_N)
else if (n <= 2 * FLINT_MPN_MULHIGH_BEST_TAB_N)
{
mp_limb_t t[2 * BEST_BASECASE_N];
mp_limb_t t[2 * FLINT_MPN_MULHIGH_BEST_TAB_N];

mp_size_t m1 = n - BEST_BASECASE_N;
mp_size_t m2 = BEST_BASECASE_N;
mp_size_t m1 = n - FLINT_MPN_MULHIGH_BEST_TAB_N;
mp_size_t m2 = FLINT_MPN_MULHIGH_BEST_TAB_N;
mp_limb_t cy, lo, w0, w1, w2;

FLINT_ASSERT(BEST_BASECASE_N <= FLINT_MPN_MULHIGH_FUNC_TAB_WIDTH);
FLINT_ASSERT(FLINT_MPN_MULHIGH_BEST_TAB_N <= FLINT_MPN_MULHIGH_FUNC_TAB_WIDTH);

flint_mpn_mul(r, x + m1, m2, y + m2, m1);
w0 = flint_mpn_mulhigh_n(t, x + m1, y, m2);
Expand Down

0 comments on commit 1209dc2

Please sign in to comment.