Skip to content

Commit

Permalink
include diagonals in sqrhigh_mulders
Browse files Browse the repository at this point in the history
  • Loading branch information
fredrik-johansson committed Dec 11, 2024
1 parent 4a06ecc commit b5676c7
Showing 1 changed file with 22 additions and 11 deletions.
33 changes: 22 additions & 11 deletions src/mpn_extras/sqrhigh.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
(at your option) any later version. See <https://www.gnu.org/licenses/>.
*/

#include <string.h>
#include "mpn_extras.h"

/* Generated by tune-sqrhigh.c */
Expand Down Expand Up @@ -128,25 +129,35 @@ _flint_mpn_sqrhigh_mulders_recursive(mp_ptr rp, mp_srcptr np, mp_size_t n)
/* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
cy = mpn_lshift(rp + l - 1, rp + l - 1, l + 1, 1);
cy += mpn_add_n(rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
mpn_add_1(rp + n + l, rp + n + l, k, cy); /* propagate carry */
MPN_INCR_U(rp + n + l, k, cy); /* propagate carry */

/* Corrections for precise high product (not needed for the
original Mulders). */
{
mp_limb_t hi, lo;

/* Note: if we relax the condition on k, we will need
a branch for k == l here to avoid double counting. */
FLINT_ASSERT(k != l);

umul_ppmm(hi, lo, np[k - 1], np[l - 1]);
MPN_INCR_U(rp + n - 1, n + 1, hi);
MPN_INCR_U(rp + n - 1, n + 1, hi);
}
}

mp_limb_t
_flint_mpn_sqrhigh_mulders(mp_ptr res, mp_srcptr u, mp_size_t n)
{
mp_ptr tmp, tr, tu;
mp_ptr tmp;
mp_limb_t bot;
TMP_INIT;

TMP_START;
tmp = TMP_ALLOC(sizeof(mp_limb_t) * (3 * (n + 1)));
tu = tmp;
tr = tu + (n + 1);
tu[0] = 0;
flint_mpn_copyi(tu + 1, u, n);
_flint_mpn_sqrhigh_mulders_recursive(tr, tu, n + 1);
flint_mpn_copyi(res, tr + n + 2, n);
bot = tr[n + 1];
tmp = TMP_ALLOC(sizeof(mp_limb_t) * (2 * n));
_flint_mpn_sqrhigh_mulders_recursive(tmp, u, n);
memcpy(res, tmp + n, sizeof(mp_limb_t) * n);
bot = tmp[n - 1];

TMP_END;
return bot;
Expand All @@ -159,7 +170,7 @@ _flint_mpn_sqrhigh_sqr(mp_ptr res, mp_srcptr u, mp_size_t n)
mp_limb_t bot;
tmp = flint_malloc(sizeof(mp_limb_t) * (2 * n));
flint_mpn_sqr(tmp, u, n);
flint_mpn_copyi(res, tmp + n, n);
memcpy(res, tmp + n, sizeof(mp_limb_t) * n);
bot = tmp[n - 1];
flint_free(tmp);
return bot;
Expand Down

0 comments on commit b5676c7

Please sign in to comment.