diff --git a/dev/gen_mulhigh_basecase.jl b/dev/gen_mulhigh_basecase.jl
new file mode 100644
index 0000000000..f1f5194650
--- /dev/null
+++ b/dev/gen_mulhigh_basecase.jl
@@ -0,0 +1,403 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+_regs = ["%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rax"]
+__regs = ["%rbx", "%rbp", "%r12", "%r13", "%r14", "%r15"]
+
+function R8(reg::String)
+ if reg == "%rax"
+ return "%al"
+ elseif reg == "%rbx"
+ return "%bl"
+ elseif reg == "%rcx"
+ return "%cl"
+ elseif reg == "%rdx"
+ return "%dl"
+ elseif reg == "%rsp"
+ return "%spl"
+ elseif reg == "%rbp"
+ return "%bpl"
+ elseif reg == "%rsi"
+ return "%sil"
+ elseif reg == "%rdi"
+ return "%dil"
+ elseif reg == "%r8"
+ return "%r8b"
+ elseif reg == "%r9"
+ return "%r9b"
+ elseif reg == "%r10"
+ return "%r10b"
+ elseif reg == "%r11"
+ return "%r11b"
+ elseif reg == "%r12"
+ return "%r12b"
+ elseif reg == "%r13"
+ return "%r13b"
+ elseif reg == "%r14"
+ return "%r14b"
+ elseif reg == "%r15"
+ return "%r15b"
+ else
+ return "hejhoppgummi"
+ end
+end
+
+function R32(reg::String)
+ if reg == "%rax"
+ return "%eax"
+ elseif reg == "%rbx"
+ return "%ebx"
+ elseif reg == "%rcx"
+ return "%ecx"
+ elseif reg == "%rdx"
+ return "%edx"
+ elseif reg == "%rsp"
+ return "%esp"
+ elseif reg == "%rbp"
+ return "%ebp"
+ elseif reg == "%rsi"
+ return "%esi"
+ elseif reg == "%rdi"
+ return "%edi"
+ elseif reg == "%r8"
+ return "%r8d"
+ elseif reg == "%r9"
+ return "%r9d"
+ elseif reg == "%r10"
+ return "%r10d"
+ elseif reg == "%r11"
+ return "%r11d"
+ elseif reg == "%r12"
+ return "%r12d"
+ elseif reg == "%r13"
+ return "%r13d"
+ elseif reg == "%r14"
+ return "%r14d"
+ elseif reg == "%r15"
+ return "%r15d"
+ else
+ return "hejhoppgummi"
+ end
+end
+
+###############################################################################
+# Preamble
+###############################################################################
+
+copyright = "#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#\n"
+
+preamble = "include(`config.m4')dnl\ndnl\n.text\n"
+
+function function_pre_post(funname::String)
+ pre = ".global\tFUNC($funname)
+.p2align\t4, 0x90
+TYPE($funname)
+
+FUNC($funname):
+\t.cfi_startproc\n"
+
+ post = ".$(funname)_end:
+SIZE($(funname), .$(funname)_end)
+.cfi_endproc\n"
+
+ return (pre, post)
+end
+
+###############################################################################
+# mulhigh
+###############################################################################
+
+function mulhigh_1()
+ r0 = _regs[9] # Important that r0 is rax
+ r1 = _regs[4]
+
+ res = _regs[1]
+ ap = _regs[2]
+ bp = _regs[3]
+
+ body = ""
+
+ body *= "\tmov\t0*8($bp), %rdx\n"
+ body *= "\tmulx\t0*8($ap), $r0, $r1\n"
+ body *= "\tmov\t$r1, 0*8($res)\n"
+
+ return body * "\n\tret\n"
+end
+
+function mulhigh_2()
+ r0 = _regs[9]
+ r1 = _regs[4]
+ r2 = _regs[5]
+
+ sc = _regs[6]
+ zr = _regs[7]
+
+ res = _regs[1]
+ ap = _regs[2]
+ bp_old = _regs[3]
+ b1 = _regs[8]
+
+ body = ""
+
+ body *= "\tmov\t1*8($bp_old), $b1\n"
+ body *= "\tmov\t0*8($bp_old), %rdx\n"
+ body *= "\txor\t$(R32(zr)), $(R32(zr))\n"
+ body *= "\tmulx\t0*8($ap), $sc, $r0\n"
+ body *= "\tmulx\t1*8($ap), $sc, $r1\n"
+ body *= "\tadcx\t$sc, $r0\n"
+ body *= "\tadcx\t$zr, $r1\n"
+
+ body *= "\tmov\t$b1, %rdx\n"
+ body *= "\tmulx\t0*8($ap), $sc, $r2\n"
+ body *= "\tadcx\t$sc, $r0\n"
+ body *= "\tadcx\t$r2, $r1\n"
+ body *= "\tmulx\t1*8($ap), $sc, $r2\n"
+ body *= "\tadox\t$sc, $r1\n"
+ body *= "\tadox\t$zr, $r2\n"
+ body *= "\tadcx\t$zr, $r2\n"
+ body *= "\tmov\t$r1, 0*8($res)\n"
+ body *= "\tmov\t$r2, 1*8($res)\n"
+
+ return body * "\n\tret\n"
+end
+
+# When n = 9, push res to stack.
+# When n = 10, push res and rsp to xmm register.
+# When n = 11, do it like n = 10, and also use bp as r(11) as r(11) is only
+# really needed in the last step.
+# When n = 12, do it like n = 11 but do not use a zero register.
+function mulhigh(n::Int; debug::Bool = false)
+ if n < 1
+ error()
+ elseif n == 1
+ return mulhigh_1()
+ elseif n == 2
+ return mulhigh_2()
+ elseif n <= 12
+ # Continue
+ else
+ error()
+ end
+
+ if debug
+ res = "res"
+ ap = "ap"
+ bp_old = "bp_old"
+ bp = "bp"
+ sc = "sc"
+ zr = "zr"
+ else
+ res = _regs[1]
+ ap = _regs[2]
+ bp_old = _regs[3] # rdx
+ bp = _regs[4] # rdx is used by mulx, so we need to switch register for bp
+
+ if n != 12
+ sc = _regs[5] # scrap register
+ zr = (n < 10) ? _regs[6] : "%rsp" # zero
+ else
+ sc = "%rsp"
+ zr = sc
+ end
+
+ if n < 9
+ _r = [_regs[9]; _regs[7:8]; __regs[1:n - 2]]
+ elseif n == 9
+ _r = [_regs[9]; _regs[7:8]; __regs[1:6]; res]
+ elseif n == 10
+ _r = [_regs[9]; _regs[6:8]; __regs[1:6]; res]
+ elseif n == 11
+ _r = [_regs[9]; _regs[6:8]; __regs[1:6]; res; bp]
+ elseif n == 12
+ _r = [_regs[9]; _regs[5:8]; __regs[1:6]; res; bp]
+ end
+ end
+
+ r(ix::Int) = debug ? "r$ix" : _r[ix + 1]
+
+ # Push
+ body = ""
+ for ix in 1:min(n - 2, 6)
+ body *= "\tpush\t$(__regs[ix])\n"
+ end
+ if n == 9
+ body *= "\tpush\t$res\n"
+ elseif n >= 10
+ body *= "\tvmovq\t%rsp, %xmm0\n"
+ body *= "\tvmovq\t$res, %xmm1\n"
+ end
+ body *= "\n"
+
+ # Prepare
+ body *= "\tmov\t$bp_old, $bp\n"
+ body *= "\tmov\t0*8($bp_old), %rdx\n"
+ if n != 12
+ body *= "\txor\t$(R32(zr)), $(R32(zr))\n"
+ end
+ body *= "\n"
+
+ # First multiplication chain
+ body *= "\tmulx\t$(n - 2)*8($ap), $sc, $(r(0))\n"
+ body *= "\tmulx\t$(n - 1)*8($ap), $sc, $(r(1))\n"
+ if n != 12
+ body *= "\tadcx\t$sc, $(r(0))\n"
+ body *= "\tadcx\t$zr, $(r(1))\n"
+ else
+ body *= "\tadd\t$sc, $(r(0))\n"
+ body *= "\tadc\t\$0, $(r(1))\n"
+ body *= "\ttest\t%al, %al\n"
+ end
+ body *= "\n"
+
+ # Intermediate multiplication chains
+ for ix in 1:min(n - 2, (n != 12) ? 8 : 9)
+ body *= "\tmov\t$ix*8($bp), %rdx\n"
+
+ body *= "\tmulx\t$(n - 2 - ix)*8($ap), $sc, $(r(ix + 2))\n"
+ body *= "\tmulx\t$(n - 1 - ix)*8($ap), $sc, $(r(ix + 1))\n"
+ body *= "\tadcx\t$(r(ix + 2)), $(r(0))\n"
+ body *= "\tadox\t$sc, $(r(0))\n"
+ body *= "\tadcx\t$(r(ix + 1)), $(r(1))\n"
+
+ for jx in 1:ix - 1
+ body *= "\tmulx\t$(n - 1 - ix + jx)*8($ap), $sc, $(r(ix + 1))\n"
+ body *= "\tadox\t$sc, $(r(jx + 0))\n"
+ body *= "\tadcx\t$(r(ix + 1)), $(r(jx + 1))\n"
+ end
+
+ body *= "\tmulx\t$(n - 1)*8($ap), $sc, $(r(ix + 1))\n"
+ body *= "\tadox\t$sc, $(r(ix + 0))\n"
+ if n == 12
+ body *= "\tmov\t\$0, $(R32(zr))\n"
+ end
+ body *= "\tadcx\t$zr, $(r(ix + 1))\n"
+ body *= "\tadox\t$zr, $(r(ix + 1))\n"
+
+ body *= "\n"
+ end
+
+ if n >= 11
+ N = n - 1
+ body *= "\tmov\t$(n - 2)*8($bp), %rdx\n"
+ for ix in 0:n - 2
+ body *= "\tmulx\t$ix*8($ap), $sc, $(r(N))\n"
+ if ix == 0
+ body *= "\tadcx\t$(r(N)), $(r(ix + 0))\n"
+ else
+ body *= "\tadox\t$sc, $(r(ix - 1))\n"
+ body *= "\tadcx\t$(r(N)), $(r(ix + 0))\n"
+ end
+ end
+ body *= "\tmulx\t$(n - 1)*8($ap), $sc, $(r(N))\n"
+ body *= "\tadox\t$sc, $(r(N - 1))\n"
+ if n == 12
+ body *= "\tmov\t\$0, $(R32(zr))\n"
+ end
+ body *= "\tadcx\t$zr, $(r(N))\n"
+ body *= "\tadox\t$zr, $(r(N))\n"
+ body *= "\n"
+ end
+
+ # Last multiplication chain
+ body *= "\tmov\t$(n - 1)*8($bp), %rdx\n"
+ for ix in 0:n - 2
+ body *= "\tmulx\t$ix*8($ap), $sc, $(r(n))\n"
+ if ix % 2 == 0
+ body *= "\tadcx\t$sc, $(r(ix + 0))\n"
+ body *= "\tadcx\t$(r(n)), $(r(ix + 1))\n"
+ else
+ body *= "\tadox\t$sc, $(r(ix + 0))\n"
+ body *= "\tadox\t$(r(n)), $(r(ix + 1))\n"
+ end
+ end
+ body *= "\tmulx\t$(n - 1)*8($ap), $sc, $(r(n))\n"
+ if (n - 1) % 2 == 0
+ body *= "\tadcx\t$sc, $(r(n - 1))\n"
+ else
+ body *= "\tadox\t$sc, $(r(n - 1))\n"
+ end
+ if n == 12
+ body *= "\tmov\t\$0, $(R32(zr))\n"
+ end
+ body *= "\tadcx\t$zr, $(r(n))\n"
+ if n == 9
+ # Use scrap register for storing pointer to res
+ res = sc
+ body *= "\tpop\t$res\n"
+ end
+ body *= "\tadox\t$zr, $(r(n))\n"
+ body *= "\n"
+
+ if n == 10 || n == 11
+ res, zr = sc, "error zr"
+ body *= "\tvmovq\t%xmm1, $res\n"
+ body *= "\tvmovq\t%xmm0, %rsp\n"
+ elseif n == 12
+ res = sc
+ body *= "\tvmovq\t%xmm1, $res\n"
+ end
+
+ # Store result
+ for ix in 1:n
+ body *= "\tmov\t$(r(ix)), $(ix - 1)*8($res)\n"
+ end
+ body *= "\n"
+
+ # Pop
+ if n == 12
+ body *= "\tvmovq\t%xmm0, %rsp\n"
+ end
+ for ix in min(n - 2, 6):-1:1
+ body *= "\tpop\t$(__regs[ix])\n"
+ end
+ body *= "\n"
+
+ if debug
+ print(body * "\tret\n")
+ else
+ return body * "\tret\n"
+ end
+end
+
+###############################################################################
+# Generate file
+###############################################################################
+
+function gen_mulhigh(m::Int, nofile::Bool = false)
+ (pre, post) = function_pre_post("flint_mpn_mulhigh_$m")
+ functionbody = mulhigh(m)
+
+ str = "$copyright\n$preamble\n$pre$functionbody$post"
+
+ if nofile
+ print(str)
+ else
+ path = String(@__DIR__) * "/../src/mpn_extras/broadwell/mulhigh_$m.asm"
+ file = open(path, "w")
+ write(file, str)
+ close(file)
+ end
+end
+
+function gen_all()
+ for m in 1:12
+ gen_mulhigh(m)
+ end
+end
diff --git a/doc/source/mpn_extras.rst b/doc/source/mpn_extras.rst
index 2b031c3a7d..dfc61a9b5a 100644
--- a/doc/source/mpn_extras.rst
+++ b/doc/source/mpn_extras.rst
@@ -69,6 +69,17 @@ Multiplication
Return the normalized length of `y` if `y \ge 0` and `y` fits into `n` limbs. Otherwise, return `-1`.
`y` may alias `x_1` but is not allowed to alias `x_2`.
+.. function:: mp_limb_t flint_mpn_mulhigh_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
+
+ Sets *{rp, n}* to the "high part" of *{xp, n}* times *{yp, n}* and returns
+ the *n*-th least significant limb, in the sense that the multiplication of
+ the *n - 1* lower limbs are never accounted for. Hence, the multiplication
+ is typically non-exact for sizes larger than one. The highest error
+ is *n + 2* ULP in the returned limb.
+
+ .. note:: This function may not exist on processors not supporting the ADX
+ instruction set.
+
Divisibility
--------------------------------------------------------------------------------
diff --git a/src/mpn_extras.h b/src/mpn_extras.h
index cf9ffe6f8a..7594cbe6b2 100644
--- a/src/mpn_extras.h
+++ b/src/mpn_extras.h
@@ -229,6 +229,27 @@ flint_mpn_sqr(mp_ptr r, mp_srcptr x, mp_size_t n)
flint_mpn_mul((_z), (_y), (_yn), (_x), (_xn)); \
}
+#if FLINT_HAVE_ADX
+# define FLINT_MPN_MULHIGH_N_FUNC_TAB_WIDTH 12
+#else
+# define FLINT_MPN_MULHIGH_N_FUNC_TAB_WIDTH 0
+#endif
+
+#define FLINT_HAVE_MULHIGH_N_FUNC(n) ((n) <= FLINT_MPN_MULHIGH_N_FUNC_TAB_WIDTH)
+
+FLINT_DLL extern const flint_mpn_mul_func_t flint_mpn_mulhigh_n_func_tab[];
+
+/* NOTE: Aliasing is allowed! */
+/* FIXME: How do we proceed for bigger n? */
+MPN_EXTRAS_INLINE
+mp_limb_t flint_mpn_mulhigh_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
+{
+ FLINT_ASSERT(n >= 1);
+ FLINT_ASSERT(FLINT_HAVE_MULHIGH_N_FUNC(n));
+
+ return flint_mpn_mulhigh_n_func_tab[n - 1](rp, xp, yp);
+}
+
/*
return the high limb of a two limb left shift by n < GMP_LIMB_BITS bits.
Note: if GMP_NAIL_BITS != 0, the rest of flint is already broken anyways.
diff --git a/src/mpn_extras/broadwell/mulhigh_1.asm b/src/mpn_extras/broadwell/mulhigh_1.asm
new file mode 100644
index 0000000000..4bd024e3c7
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_1.asm
@@ -0,0 +1,29 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_1)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_1)
+
+FUNC(flint_mpn_mulhigh_1):
+ .cfi_startproc
+ mov 0*8(%rdx), %rdx
+ mulx 0*8(%rsi), %rax, %rcx
+ mov %rcx, 0*8(%rdi)
+
+ ret
+.flint_mpn_mulhigh_1_end:
+SIZE(flint_mpn_mulhigh_1, .flint_mpn_mulhigh_1_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_10.asm b/src/mpn_extras/broadwell/mulhigh_10.asm
new file mode 100644
index 0000000000..d92c3d0699
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_10.asm
@@ -0,0 +1,268 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_10)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_10)
+
+FUNC(flint_mpn_mulhigh_10):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ vmovq %rsp, %xmm0
+ vmovq %rdi, %xmm1
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %esp, %esp
+
+ mulx 8*8(%rsi), %r8, %rax
+ mulx 9*8(%rsi), %r8, %r9
+ adcx %r8, %rax
+ adcx %rsp, %r9
+
+ mov 1*8(%rcx), %rdx
+ mulx 7*8(%rsi), %r8, %r11
+ mulx 8*8(%rsi), %r8, %r10
+ adcx %r11, %rax
+ adox %r8, %rax
+ adcx %r10, %r9
+ mulx 9*8(%rsi), %r8, %r10
+ adox %r8, %r9
+ adcx %rsp, %r10
+ adox %rsp, %r10
+
+ mov 2*8(%rcx), %rdx
+ mulx 6*8(%rsi), %r8, %rbx
+ mulx 7*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r9
+ mulx 8*8(%rsi), %r8, %r11
+ adox %r8, %r9
+ adcx %r11, %r10
+ mulx 9*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %rsp, %r11
+ adox %rsp, %r11
+
+ mov 3*8(%rcx), %rdx
+ mulx 5*8(%rsi), %r8, %rbp
+ mulx 6*8(%rsi), %r8, %rbx
+ adcx %rbp, %rax
+ adox %r8, %rax
+ adcx %rbx, %r9
+ mulx 7*8(%rsi), %r8, %rbx
+ adox %r8, %r9
+ adcx %rbx, %r10
+ mulx 8*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adcx %rbx, %r11
+ mulx 9*8(%rsi), %r8, %rbx
+ adox %r8, %r11
+ adcx %rsp, %rbx
+ adox %rsp, %rbx
+
+ mov 4*8(%rcx), %rdx
+ mulx 4*8(%rsi), %r8, %r12
+ mulx 5*8(%rsi), %r8, %rbp
+ adcx %r12, %rax
+ adox %r8, %rax
+ adcx %rbp, %r9
+ mulx 6*8(%rsi), %r8, %rbp
+ adox %r8, %r9
+ adcx %rbp, %r10
+ mulx 7*8(%rsi), %r8, %rbp
+ adox %r8, %r10
+ adcx %rbp, %r11
+ mulx 8*8(%rsi), %r8, %rbp
+ adox %r8, %r11
+ adcx %rbp, %rbx
+ mulx 9*8(%rsi), %r8, %rbp
+ adox %r8, %rbx
+ adcx %rsp, %rbp
+ adox %rsp, %rbp
+
+ mov 5*8(%rcx), %rdx
+ mulx 3*8(%rsi), %r8, %r13
+ mulx 4*8(%rsi), %r8, %r12
+ adcx %r13, %rax
+ adox %r8, %rax
+ adcx %r12, %r9
+ mulx 5*8(%rsi), %r8, %r12
+ adox %r8, %r9
+ adcx %r12, %r10
+ mulx 6*8(%rsi), %r8, %r12
+ adox %r8, %r10
+ adcx %r12, %r11
+ mulx 7*8(%rsi), %r8, %r12
+ adox %r8, %r11
+ adcx %r12, %rbx
+ mulx 8*8(%rsi), %r8, %r12
+ adox %r8, %rbx
+ adcx %r12, %rbp
+ mulx 9*8(%rsi), %r8, %r12
+ adox %r8, %rbp
+ adcx %rsp, %r12
+ adox %rsp, %r12
+
+ mov 6*8(%rcx), %rdx
+ mulx 2*8(%rsi), %r8, %r14
+ mulx 3*8(%rsi), %r8, %r13
+ adcx %r14, %rax
+ adox %r8, %rax
+ adcx %r13, %r9
+ mulx 4*8(%rsi), %r8, %r13
+ adox %r8, %r9
+ adcx %r13, %r10
+ mulx 5*8(%rsi), %r8, %r13
+ adox %r8, %r10
+ adcx %r13, %r11
+ mulx 6*8(%rsi), %r8, %r13
+ adox %r8, %r11
+ adcx %r13, %rbx
+ mulx 7*8(%rsi), %r8, %r13
+ adox %r8, %rbx
+ adcx %r13, %rbp
+ mulx 8*8(%rsi), %r8, %r13
+ adox %r8, %rbp
+ adcx %r13, %r12
+ mulx 9*8(%rsi), %r8, %r13
+ adox %r8, %r12
+ adcx %rsp, %r13
+ adox %rsp, %r13
+
+ mov 7*8(%rcx), %rdx
+ mulx 1*8(%rsi), %r8, %r15
+ mulx 2*8(%rsi), %r8, %r14
+ adcx %r15, %rax
+ adox %r8, %rax
+ adcx %r14, %r9
+ mulx 3*8(%rsi), %r8, %r14
+ adox %r8, %r9
+ adcx %r14, %r10
+ mulx 4*8(%rsi), %r8, %r14
+ adox %r8, %r10
+ adcx %r14, %r11
+ mulx 5*8(%rsi), %r8, %r14
+ adox %r8, %r11
+ adcx %r14, %rbx
+ mulx 6*8(%rsi), %r8, %r14
+ adox %r8, %rbx
+ adcx %r14, %rbp
+ mulx 7*8(%rsi), %r8, %r14
+ adox %r8, %rbp
+ adcx %r14, %r12
+ mulx 8*8(%rsi), %r8, %r14
+ adox %r8, %r12
+ adcx %r14, %r13
+ mulx 9*8(%rsi), %r8, %r14
+ adox %r8, %r13
+ adcx %rsp, %r14
+ adox %rsp, %r14
+
+ mov 8*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rdi
+ mulx 1*8(%rsi), %r8, %r15
+ adcx %rdi, %rax
+ adox %r8, %rax
+ adcx %r15, %r9
+ mulx 2*8(%rsi), %r8, %r15
+ adox %r8, %r9
+ adcx %r15, %r10
+ mulx 3*8(%rsi), %r8, %r15
+ adox %r8, %r10
+ adcx %r15, %r11
+ mulx 4*8(%rsi), %r8, %r15
+ adox %r8, %r11
+ adcx %r15, %rbx
+ mulx 5*8(%rsi), %r8, %r15
+ adox %r8, %rbx
+ adcx %r15, %rbp
+ mulx 6*8(%rsi), %r8, %r15
+ adox %r8, %rbp
+ adcx %r15, %r12
+ mulx 7*8(%rsi), %r8, %r15
+ adox %r8, %r12
+ adcx %r15, %r13
+ mulx 8*8(%rsi), %r8, %r15
+ adox %r8, %r13
+ adcx %r15, %r14
+ mulx 9*8(%rsi), %r8, %r15
+ adox %r8, %r14
+ adcx %rsp, %r15
+ adox %rsp, %r15
+
+ mov 9*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rdi
+ adcx %r8, %rax
+ adcx %rdi, %r9
+ mulx 1*8(%rsi), %r8, %rdi
+ adox %r8, %r9
+ adox %rdi, %r10
+ mulx 2*8(%rsi), %r8, %rdi
+ adcx %r8, %r10
+ adcx %rdi, %r11
+ mulx 3*8(%rsi), %r8, %rdi
+ adox %r8, %r11
+ adox %rdi, %rbx
+ mulx 4*8(%rsi), %r8, %rdi
+ adcx %r8, %rbx
+ adcx %rdi, %rbp
+ mulx 5*8(%rsi), %r8, %rdi
+ adox %r8, %rbp
+ adox %rdi, %r12
+ mulx 6*8(%rsi), %r8, %rdi
+ adcx %r8, %r12
+ adcx %rdi, %r13
+ mulx 7*8(%rsi), %r8, %rdi
+ adox %r8, %r13
+ adox %rdi, %r14
+ mulx 8*8(%rsi), %r8, %rdi
+ adcx %r8, %r14
+ adcx %rdi, %r15
+ mulx 9*8(%rsi), %r8, %rdi
+ adox %r8, %r15
+ adcx %rsp, %rdi
+ adox %rsp, %rdi
+
+ vmovq %xmm1, %r8
+ vmovq %xmm0, %rsp
+ mov %r9, 0*8(%r8)
+ mov %r10, 1*8(%r8)
+ mov %r11, 2*8(%r8)
+ mov %rbx, 3*8(%r8)
+ mov %rbp, 4*8(%r8)
+ mov %r12, 5*8(%r8)
+ mov %r13, 6*8(%r8)
+ mov %r14, 7*8(%r8)
+ mov %r15, 8*8(%r8)
+ mov %rdi, 9*8(%r8)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_10_end:
+SIZE(flint_mpn_mulhigh_10, .flint_mpn_mulhigh_10_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_11.asm b/src/mpn_extras/broadwell/mulhigh_11.asm
new file mode 100644
index 0000000000..aced9621d6
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_11.asm
@@ -0,0 +1,307 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_11)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_11)
+
+FUNC(flint_mpn_mulhigh_11):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ vmovq %rsp, %xmm0
+ vmovq %rdi, %xmm1
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %esp, %esp
+
+ mulx 9*8(%rsi), %r8, %rax
+ mulx 10*8(%rsi), %r8, %r9
+ adcx %r8, %rax
+ adcx %rsp, %r9
+
+ mov 1*8(%rcx), %rdx
+ mulx 8*8(%rsi), %r8, %r11
+ mulx 9*8(%rsi), %r8, %r10
+ adcx %r11, %rax
+ adox %r8, %rax
+ adcx %r10, %r9
+ mulx 10*8(%rsi), %r8, %r10
+ adox %r8, %r9
+ adcx %rsp, %r10
+ adox %rsp, %r10
+
+ mov 2*8(%rcx), %rdx
+ mulx 7*8(%rsi), %r8, %rbx
+ mulx 8*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r9
+ mulx 9*8(%rsi), %r8, %r11
+ adox %r8, %r9
+ adcx %r11, %r10
+ mulx 10*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %rsp, %r11
+ adox %rsp, %r11
+
+ mov 3*8(%rcx), %rdx
+ mulx 6*8(%rsi), %r8, %rbp
+ mulx 7*8(%rsi), %r8, %rbx
+ adcx %rbp, %rax
+ adox %r8, %rax
+ adcx %rbx, %r9
+ mulx 8*8(%rsi), %r8, %rbx
+ adox %r8, %r9
+ adcx %rbx, %r10
+ mulx 9*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adcx %rbx, %r11
+ mulx 10*8(%rsi), %r8, %rbx
+ adox %r8, %r11
+ adcx %rsp, %rbx
+ adox %rsp, %rbx
+
+ mov 4*8(%rcx), %rdx
+ mulx 5*8(%rsi), %r8, %r12
+ mulx 6*8(%rsi), %r8, %rbp
+ adcx %r12, %rax
+ adox %r8, %rax
+ adcx %rbp, %r9
+ mulx 7*8(%rsi), %r8, %rbp
+ adox %r8, %r9
+ adcx %rbp, %r10
+ mulx 8*8(%rsi), %r8, %rbp
+ adox %r8, %r10
+ adcx %rbp, %r11
+ mulx 9*8(%rsi), %r8, %rbp
+ adox %r8, %r11
+ adcx %rbp, %rbx
+ mulx 10*8(%rsi), %r8, %rbp
+ adox %r8, %rbx
+ adcx %rsp, %rbp
+ adox %rsp, %rbp
+
+ mov 5*8(%rcx), %rdx
+ mulx 4*8(%rsi), %r8, %r13
+ mulx 5*8(%rsi), %r8, %r12
+ adcx %r13, %rax
+ adox %r8, %rax
+ adcx %r12, %r9
+ mulx 6*8(%rsi), %r8, %r12
+ adox %r8, %r9
+ adcx %r12, %r10
+ mulx 7*8(%rsi), %r8, %r12
+ adox %r8, %r10
+ adcx %r12, %r11
+ mulx 8*8(%rsi), %r8, %r12
+ adox %r8, %r11
+ adcx %r12, %rbx
+ mulx 9*8(%rsi), %r8, %r12
+ adox %r8, %rbx
+ adcx %r12, %rbp
+ mulx 10*8(%rsi), %r8, %r12
+ adox %r8, %rbp
+ adcx %rsp, %r12
+ adox %rsp, %r12
+
+ mov 6*8(%rcx), %rdx
+ mulx 3*8(%rsi), %r8, %r14
+ mulx 4*8(%rsi), %r8, %r13
+ adcx %r14, %rax
+ adox %r8, %rax
+ adcx %r13, %r9
+ mulx 5*8(%rsi), %r8, %r13
+ adox %r8, %r9
+ adcx %r13, %r10
+ mulx 6*8(%rsi), %r8, %r13
+ adox %r8, %r10
+ adcx %r13, %r11
+ mulx 7*8(%rsi), %r8, %r13
+ adox %r8, %r11
+ adcx %r13, %rbx
+ mulx 8*8(%rsi), %r8, %r13
+ adox %r8, %rbx
+ adcx %r13, %rbp
+ mulx 9*8(%rsi), %r8, %r13
+ adox %r8, %rbp
+ adcx %r13, %r12
+ mulx 10*8(%rsi), %r8, %r13
+ adox %r8, %r12
+ adcx %rsp, %r13
+ adox %rsp, %r13
+
+ mov 7*8(%rcx), %rdx
+ mulx 2*8(%rsi), %r8, %r15
+ mulx 3*8(%rsi), %r8, %r14
+ adcx %r15, %rax
+ adox %r8, %rax
+ adcx %r14, %r9
+ mulx 4*8(%rsi), %r8, %r14
+ adox %r8, %r9
+ adcx %r14, %r10
+ mulx 5*8(%rsi), %r8, %r14
+ adox %r8, %r10
+ adcx %r14, %r11
+ mulx 6*8(%rsi), %r8, %r14
+ adox %r8, %r11
+ adcx %r14, %rbx
+ mulx 7*8(%rsi), %r8, %r14
+ adox %r8, %rbx
+ adcx %r14, %rbp
+ mulx 8*8(%rsi), %r8, %r14
+ adox %r8, %rbp
+ adcx %r14, %r12
+ mulx 9*8(%rsi), %r8, %r14
+ adox %r8, %r12
+ adcx %r14, %r13
+ mulx 10*8(%rsi), %r8, %r14
+ adox %r8, %r13
+ adcx %rsp, %r14
+ adox %rsp, %r14
+
+ mov 8*8(%rcx), %rdx
+ mulx 1*8(%rsi), %r8, %rdi
+ mulx 2*8(%rsi), %r8, %r15
+ adcx %rdi, %rax
+ adox %r8, %rax
+ adcx %r15, %r9
+ mulx 3*8(%rsi), %r8, %r15
+ adox %r8, %r9
+ adcx %r15, %r10
+ mulx 4*8(%rsi), %r8, %r15
+ adox %r8, %r10
+ adcx %r15, %r11
+ mulx 5*8(%rsi), %r8, %r15
+ adox %r8, %r11
+ adcx %r15, %rbx
+ mulx 6*8(%rsi), %r8, %r15
+ adox %r8, %rbx
+ adcx %r15, %rbp
+ mulx 7*8(%rsi), %r8, %r15
+ adox %r8, %rbp
+ adcx %r15, %r12
+ mulx 8*8(%rsi), %r8, %r15
+ adox %r8, %r12
+ adcx %r15, %r13
+ mulx 9*8(%rsi), %r8, %r15
+ adox %r8, %r13
+ adcx %r15, %r14
+ mulx 10*8(%rsi), %r8, %r15
+ adox %r8, %r14
+ adcx %rsp, %r15
+ adox %rsp, %r15
+
+ mov 9*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rdi
+ adcx %rdi, %rax
+ mulx 1*8(%rsi), %r8, %rdi
+ adox %r8, %rax
+ adcx %rdi, %r9
+ mulx 2*8(%rsi), %r8, %rdi
+ adox %r8, %r9
+ adcx %rdi, %r10
+ mulx 3*8(%rsi), %r8, %rdi
+ adox %r8, %r10
+ adcx %rdi, %r11
+ mulx 4*8(%rsi), %r8, %rdi
+ adox %r8, %r11
+ adcx %rdi, %rbx
+ mulx 5*8(%rsi), %r8, %rdi
+ adox %r8, %rbx
+ adcx %rdi, %rbp
+ mulx 6*8(%rsi), %r8, %rdi
+ adox %r8, %rbp
+ adcx %rdi, %r12
+ mulx 7*8(%rsi), %r8, %rdi
+ adox %r8, %r12
+ adcx %rdi, %r13
+ mulx 8*8(%rsi), %r8, %rdi
+ adox %r8, %r13
+ adcx %rdi, %r14
+ mulx 9*8(%rsi), %r8, %rdi
+ adox %r8, %r14
+ adcx %rdi, %r15
+ mulx 10*8(%rsi), %r8, %rdi
+ adox %r8, %r15
+ adcx %rsp, %rdi
+ adox %rsp, %rdi
+
+ mov 10*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rcx
+ adcx %r8, %rax
+ adcx %rcx, %r9
+ mulx 1*8(%rsi), %r8, %rcx
+ adox %r8, %r9
+ adox %rcx, %r10
+ mulx 2*8(%rsi), %r8, %rcx
+ adcx %r8, %r10
+ adcx %rcx, %r11
+ mulx 3*8(%rsi), %r8, %rcx
+ adox %r8, %r11
+ adox %rcx, %rbx
+ mulx 4*8(%rsi), %r8, %rcx
+ adcx %r8, %rbx
+ adcx %rcx, %rbp
+ mulx 5*8(%rsi), %r8, %rcx
+ adox %r8, %rbp
+ adox %rcx, %r12
+ mulx 6*8(%rsi), %r8, %rcx
+ adcx %r8, %r12
+ adcx %rcx, %r13
+ mulx 7*8(%rsi), %r8, %rcx
+ adox %r8, %r13
+ adox %rcx, %r14
+ mulx 8*8(%rsi), %r8, %rcx
+ adcx %r8, %r14
+ adcx %rcx, %r15
+ mulx 9*8(%rsi), %r8, %rcx
+ adox %r8, %r15
+ adox %rcx, %rdi
+ mulx 10*8(%rsi), %r8, %rcx
+ adcx %r8, %rdi
+ adcx %rsp, %rcx
+ adox %rsp, %rcx
+
+ vmovq %xmm1, %r8
+ vmovq %xmm0, %rsp
+ mov %r9, 0*8(%r8)
+ mov %r10, 1*8(%r8)
+ mov %r11, 2*8(%r8)
+ mov %rbx, 3*8(%r8)
+ mov %rbp, 4*8(%r8)
+ mov %r12, 5*8(%r8)
+ mov %r13, 6*8(%r8)
+ mov %r14, 7*8(%r8)
+ mov %r15, 8*8(%r8)
+ mov %rdi, 9*8(%r8)
+ mov %rcx, 10*8(%r8)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_11_end:
+SIZE(flint_mpn_mulhigh_11, .flint_mpn_mulhigh_11_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_12.asm b/src/mpn_extras/broadwell/mulhigh_12.asm
new file mode 100644
index 0000000000..bc99365cbb
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_12.asm
@@ -0,0 +1,360 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_12)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_12)
+
+FUNC(flint_mpn_mulhigh_12):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ vmovq %rsp, %xmm0
+ vmovq %rdi, %xmm1
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+
+ mulx 10*8(%rsi), %rsp, %rax
+ mulx 11*8(%rsi), %rsp, %r8
+ add %rsp, %rax
+ adc $0, %r8
+ test %al, %al
+
+ mov 1*8(%rcx), %rdx
+ mulx 9*8(%rsi), %rsp, %r10
+ mulx 10*8(%rsi), %rsp, %r9
+ adcx %r10, %rax
+ adox %rsp, %rax
+ adcx %r9, %r8
+ mulx 11*8(%rsi), %rsp, %r9
+ adox %rsp, %r8
+ mov $0, %esp
+ adcx %rsp, %r9
+ adox %rsp, %r9
+
+ mov 2*8(%rcx), %rdx
+ mulx 8*8(%rsi), %rsp, %r11
+ mulx 9*8(%rsi), %rsp, %r10
+ adcx %r11, %rax
+ adox %rsp, %rax
+ adcx %r10, %r8
+ mulx 10*8(%rsi), %rsp, %r10
+ adox %rsp, %r8
+ adcx %r10, %r9
+ mulx 11*8(%rsi), %rsp, %r10
+ adox %rsp, %r9
+ mov $0, %esp
+ adcx %rsp, %r10
+ adox %rsp, %r10
+
+ mov 3*8(%rcx), %rdx
+ mulx 7*8(%rsi), %rsp, %rbx
+ mulx 8*8(%rsi), %rsp, %r11
+ adcx %rbx, %rax
+ adox %rsp, %rax
+ adcx %r11, %r8
+ mulx 9*8(%rsi), %rsp, %r11
+ adox %rsp, %r8
+ adcx %r11, %r9
+ mulx 10*8(%rsi), %rsp, %r11
+ adox %rsp, %r9
+ adcx %r11, %r10
+ mulx 11*8(%rsi), %rsp, %r11
+ adox %rsp, %r10
+ mov $0, %esp
+ adcx %rsp, %r11
+ adox %rsp, %r11
+
+ mov 4*8(%rcx), %rdx
+ mulx 6*8(%rsi), %rsp, %rbp
+ mulx 7*8(%rsi), %rsp, %rbx
+ adcx %rbp, %rax
+ adox %rsp, %rax
+ adcx %rbx, %r8
+ mulx 8*8(%rsi), %rsp, %rbx
+ adox %rsp, %r8
+ adcx %rbx, %r9
+ mulx 9*8(%rsi), %rsp, %rbx
+ adox %rsp, %r9
+ adcx %rbx, %r10
+ mulx 10*8(%rsi), %rsp, %rbx
+ adox %rsp, %r10
+ adcx %rbx, %r11
+ mulx 11*8(%rsi), %rsp, %rbx
+ adox %rsp, %r11
+ mov $0, %esp
+ adcx %rsp, %rbx
+ adox %rsp, %rbx
+
+ mov 5*8(%rcx), %rdx
+ mulx 5*8(%rsi), %rsp, %r12
+ mulx 6*8(%rsi), %rsp, %rbp
+ adcx %r12, %rax
+ adox %rsp, %rax
+ adcx %rbp, %r8
+ mulx 7*8(%rsi), %rsp, %rbp
+ adox %rsp, %r8
+ adcx %rbp, %r9
+ mulx 8*8(%rsi), %rsp, %rbp
+ adox %rsp, %r9
+ adcx %rbp, %r10
+ mulx 9*8(%rsi), %rsp, %rbp
+ adox %rsp, %r10
+ adcx %rbp, %r11
+ mulx 10*8(%rsi), %rsp, %rbp
+ adox %rsp, %r11
+ adcx %rbp, %rbx
+ mulx 11*8(%rsi), %rsp, %rbp
+ adox %rsp, %rbx
+ mov $0, %esp
+ adcx %rsp, %rbp
+ adox %rsp, %rbp
+
+ mov 6*8(%rcx), %rdx
+ mulx 4*8(%rsi), %rsp, %r13
+ mulx 5*8(%rsi), %rsp, %r12
+ adcx %r13, %rax
+ adox %rsp, %rax
+ adcx %r12, %r8
+ mulx 6*8(%rsi), %rsp, %r12
+ adox %rsp, %r8
+ adcx %r12, %r9
+ mulx 7*8(%rsi), %rsp, %r12
+ adox %rsp, %r9
+ adcx %r12, %r10
+ mulx 8*8(%rsi), %rsp, %r12
+ adox %rsp, %r10
+ adcx %r12, %r11
+ mulx 9*8(%rsi), %rsp, %r12
+ adox %rsp, %r11
+ adcx %r12, %rbx
+ mulx 10*8(%rsi), %rsp, %r12
+ adox %rsp, %rbx
+ adcx %r12, %rbp
+ mulx 11*8(%rsi), %rsp, %r12
+ adox %rsp, %rbp
+ mov $0, %esp
+ adcx %rsp, %r12
+ adox %rsp, %r12
+
+ mov 7*8(%rcx), %rdx
+ mulx 3*8(%rsi), %rsp, %r14
+ mulx 4*8(%rsi), %rsp, %r13
+ adcx %r14, %rax
+ adox %rsp, %rax
+ adcx %r13, %r8
+ mulx 5*8(%rsi), %rsp, %r13
+ adox %rsp, %r8
+ adcx %r13, %r9
+ mulx 6*8(%rsi), %rsp, %r13
+ adox %rsp, %r9
+ adcx %r13, %r10
+ mulx 7*8(%rsi), %rsp, %r13
+ adox %rsp, %r10
+ adcx %r13, %r11
+ mulx 8*8(%rsi), %rsp, %r13
+ adox %rsp, %r11
+ adcx %r13, %rbx
+ mulx 9*8(%rsi), %rsp, %r13
+ adox %rsp, %rbx
+ adcx %r13, %rbp
+ mulx 10*8(%rsi), %rsp, %r13
+ adox %rsp, %rbp
+ adcx %r13, %r12
+ mulx 11*8(%rsi), %rsp, %r13
+ adox %rsp, %r12
+ mov $0, %esp
+ adcx %rsp, %r13
+ adox %rsp, %r13
+
+ mov 8*8(%rcx), %rdx
+ mulx 2*8(%rsi), %rsp, %r15
+ mulx 3*8(%rsi), %rsp, %r14
+ adcx %r15, %rax
+ adox %rsp, %rax
+ adcx %r14, %r8
+ mulx 4*8(%rsi), %rsp, %r14
+ adox %rsp, %r8
+ adcx %r14, %r9
+ mulx 5*8(%rsi), %rsp, %r14
+ adox %rsp, %r9
+ adcx %r14, %r10
+ mulx 6*8(%rsi), %rsp, %r14
+ adox %rsp, %r10
+ adcx %r14, %r11
+ mulx 7*8(%rsi), %rsp, %r14
+ adox %rsp, %r11
+ adcx %r14, %rbx
+ mulx 8*8(%rsi), %rsp, %r14
+ adox %rsp, %rbx
+ adcx %r14, %rbp
+ mulx 9*8(%rsi), %rsp, %r14
+ adox %rsp, %rbp
+ adcx %r14, %r12
+ mulx 10*8(%rsi), %rsp, %r14
+ adox %rsp, %r12
+ adcx %r14, %r13
+ mulx 11*8(%rsi), %rsp, %r14
+ adox %rsp, %r13
+ mov $0, %esp
+ adcx %rsp, %r14
+ adox %rsp, %r14
+
+ mov 9*8(%rcx), %rdx
+ mulx 1*8(%rsi), %rsp, %rdi
+ mulx 2*8(%rsi), %rsp, %r15
+ adcx %rdi, %rax
+ adox %rsp, %rax
+ adcx %r15, %r8
+ mulx 3*8(%rsi), %rsp, %r15
+ adox %rsp, %r8
+ adcx %r15, %r9
+ mulx 4*8(%rsi), %rsp, %r15
+ adox %rsp, %r9
+ adcx %r15, %r10
+ mulx 5*8(%rsi), %rsp, %r15
+ adox %rsp, %r10
+ adcx %r15, %r11
+ mulx 6*8(%rsi), %rsp, %r15
+ adox %rsp, %r11
+ adcx %r15, %rbx
+ mulx 7*8(%rsi), %rsp, %r15
+ adox %rsp, %rbx
+ adcx %r15, %rbp
+ mulx 8*8(%rsi), %rsp, %r15
+ adox %rsp, %rbp
+ adcx %r15, %r12
+ mulx 9*8(%rsi), %rsp, %r15
+ adox %rsp, %r12
+ adcx %r15, %r13
+ mulx 10*8(%rsi), %rsp, %r15
+ adox %rsp, %r13
+ adcx %r15, %r14
+ mulx 11*8(%rsi), %rsp, %r15
+ adox %rsp, %r14
+ mov $0, %esp
+ adcx %rsp, %r15
+ adox %rsp, %r15
+
+ mov 10*8(%rcx), %rdx
+ mulx 0*8(%rsi), %rsp, %rdi
+ adcx %rdi, %rax
+ mulx 1*8(%rsi), %rsp, %rdi
+ adox %rsp, %rax
+ adcx %rdi, %r8
+ mulx 2*8(%rsi), %rsp, %rdi
+ adox %rsp, %r8
+ adcx %rdi, %r9
+ mulx 3*8(%rsi), %rsp, %rdi
+ adox %rsp, %r9
+ adcx %rdi, %r10
+ mulx 4*8(%rsi), %rsp, %rdi
+ adox %rsp, %r10
+ adcx %rdi, %r11
+ mulx 5*8(%rsi), %rsp, %rdi
+ adox %rsp, %r11
+ adcx %rdi, %rbx
+ mulx 6*8(%rsi), %rsp, %rdi
+ adox %rsp, %rbx
+ adcx %rdi, %rbp
+ mulx 7*8(%rsi), %rsp, %rdi
+ adox %rsp, %rbp
+ adcx %rdi, %r12
+ mulx 8*8(%rsi), %rsp, %rdi
+ adox %rsp, %r12
+ adcx %rdi, %r13
+ mulx 9*8(%rsi), %rsp, %rdi
+ adox %rsp, %r13
+ adcx %rdi, %r14
+ mulx 10*8(%rsi), %rsp, %rdi
+ adox %rsp, %r14
+ adcx %rdi, %r15
+ mulx 11*8(%rsi), %rsp, %rdi
+ adox %rsp, %r15
+ mov $0, %esp
+ adcx %rsp, %rdi
+ adox %rsp, %rdi
+
+ mov 11*8(%rcx), %rdx
+ mulx 0*8(%rsi), %rsp, %rcx
+ adcx %rsp, %rax
+ adcx %rcx, %r8
+ mulx 1*8(%rsi), %rsp, %rcx
+ adox %rsp, %r8
+ adox %rcx, %r9
+ mulx 2*8(%rsi), %rsp, %rcx
+ adcx %rsp, %r9
+ adcx %rcx, %r10
+ mulx 3*8(%rsi), %rsp, %rcx
+ adox %rsp, %r10
+ adox %rcx, %r11
+ mulx 4*8(%rsi), %rsp, %rcx
+ adcx %rsp, %r11
+ adcx %rcx, %rbx
+ mulx 5*8(%rsi), %rsp, %rcx
+ adox %rsp, %rbx
+ adox %rcx, %rbp
+ mulx 6*8(%rsi), %rsp, %rcx
+ adcx %rsp, %rbp
+ adcx %rcx, %r12
+ mulx 7*8(%rsi), %rsp, %rcx
+ adox %rsp, %r12
+ adox %rcx, %r13
+ mulx 8*8(%rsi), %rsp, %rcx
+ adcx %rsp, %r13
+ adcx %rcx, %r14
+ mulx 9*8(%rsi), %rsp, %rcx
+ adox %rsp, %r14
+ adox %rcx, %r15
+ mulx 10*8(%rsi), %rsp, %rcx
+ adcx %rsp, %r15
+ adcx %rcx, %rdi
+ mulx 11*8(%rsi), %rsp, %rcx
+ adox %rsp, %rdi
+ mov $0, %esp
+ adcx %rsp, %rcx
+ adox %rsp, %rcx
+
+ vmovq %xmm1, %rsp
+ mov %r8, 0*8(%rsp)
+ mov %r9, 1*8(%rsp)
+ mov %r10, 2*8(%rsp)
+ mov %r11, 3*8(%rsp)
+ mov %rbx, 4*8(%rsp)
+ mov %rbp, 5*8(%rsp)
+ mov %r12, 6*8(%rsp)
+ mov %r13, 7*8(%rsp)
+ mov %r14, 8*8(%rsp)
+ mov %r15, 9*8(%rsp)
+ mov %rdi, 10*8(%rsp)
+ mov %rcx, 11*8(%rsp)
+
+ vmovq %xmm0, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_12_end:
+SIZE(flint_mpn_mulhigh_12, .flint_mpn_mulhigh_12_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_2.asm b/src/mpn_extras/broadwell/mulhigh_2.asm
new file mode 100644
index 0000000000..2ebbfa0d35
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_2.asm
@@ -0,0 +1,43 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_2)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_2)
+
+FUNC(flint_mpn_mulhigh_2):
+ .cfi_startproc
+ mov 1*8(%rdx), %r11
+ mov 0*8(%rdx), %rdx
+ xor %r10d, %r10d
+ mulx 0*8(%rsi), %r9, %rax
+ mulx 1*8(%rsi), %r9, %rcx
+ adcx %r9, %rax
+ adcx %r10, %rcx
+ mov %r11, %rdx
+ mulx 0*8(%rsi), %r9, %r8
+ adcx %r9, %rax
+ adcx %r8, %rcx
+ mulx 1*8(%rsi), %r9, %r8
+ adox %r9, %rcx
+ adox %r10, %r8
+ adcx %r10, %r8
+ mov %rcx, 0*8(%rdi)
+ mov %r8, 1*8(%rdi)
+
+ ret
+.flint_mpn_mulhigh_2_end:
+SIZE(flint_mpn_mulhigh_2, .flint_mpn_mulhigh_2_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_3.asm b/src/mpn_extras/broadwell/mulhigh_3.asm
new file mode 100644
index 0000000000..826d15b348
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_3.asm
@@ -0,0 +1,65 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_3)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_3)
+
+FUNC(flint_mpn_mulhigh_3):
+ .cfi_startproc
+ push %rbx
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %r9d, %r9d
+
+ mulx 1*8(%rsi), %r8, %rax
+ mulx 2*8(%rsi), %r8, %r10
+ adcx %r8, %rax
+ adcx %r9, %r10
+
+ mov 1*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rbx
+ mulx 1*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r10
+ mulx 2*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %r9, %r11
+ adox %r9, %r11
+
+ mov 2*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rbx
+ adcx %r8, %rax
+ adcx %rbx, %r10
+ mulx 1*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adox %rbx, %r11
+ mulx 2*8(%rsi), %r8, %rbx
+ adcx %r8, %r11
+ adcx %r9, %rbx
+ adox %r9, %rbx
+
+ mov %r10, 0*8(%rdi)
+ mov %r11, 1*8(%rdi)
+ mov %rbx, 2*8(%rdi)
+
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_3_end:
+SIZE(flint_mpn_mulhigh_3, .flint_mpn_mulhigh_3_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_4.asm b/src/mpn_extras/broadwell/mulhigh_4.asm
new file mode 100644
index 0000000000..746b2fc915
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_4.asm
@@ -0,0 +1,85 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_4)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_4)
+
+FUNC(flint_mpn_mulhigh_4):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %r9d, %r9d
+
+ mulx 2*8(%rsi), %r8, %rax
+ mulx 3*8(%rsi), %r8, %r10
+ adcx %r8, %rax
+ adcx %r9, %r10
+
+ mov 1*8(%rcx), %rdx
+ mulx 1*8(%rsi), %r8, %rbx
+ mulx 2*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r10
+ mulx 3*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %r9, %r11
+ adox %r9, %r11
+
+ mov 2*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rbp
+ mulx 1*8(%rsi), %r8, %rbx
+ adcx %rbp, %rax
+ adox %r8, %rax
+ adcx %rbx, %r10
+ mulx 2*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adcx %rbx, %r11
+ mulx 3*8(%rsi), %r8, %rbx
+ adox %r8, %r11
+ adcx %r9, %rbx
+ adox %r9, %rbx
+
+ mov 3*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rbp
+ adcx %r8, %rax
+ adcx %rbp, %r10
+ mulx 1*8(%rsi), %r8, %rbp
+ adox %r8, %r10
+ adox %rbp, %r11
+ mulx 2*8(%rsi), %r8, %rbp
+ adcx %r8, %r11
+ adcx %rbp, %rbx
+ mulx 3*8(%rsi), %r8, %rbp
+ adox %r8, %rbx
+ adcx %r9, %rbp
+ adox %r9, %rbp
+
+ mov %r10, 0*8(%rdi)
+ mov %r11, 1*8(%rdi)
+ mov %rbx, 2*8(%rdi)
+ mov %rbp, 3*8(%rdi)
+
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_4_end:
+SIZE(flint_mpn_mulhigh_4, .flint_mpn_mulhigh_4_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_5.asm b/src/mpn_extras/broadwell/mulhigh_5.asm
new file mode 100644
index 0000000000..c77c9c08aa
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_5.asm
@@ -0,0 +1,108 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_5)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_5)
+
+FUNC(flint_mpn_mulhigh_5):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+ push %r12
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %r9d, %r9d
+
+ mulx 3*8(%rsi), %r8, %rax
+ mulx 4*8(%rsi), %r8, %r10
+ adcx %r8, %rax
+ adcx %r9, %r10
+
+ mov 1*8(%rcx), %rdx
+ mulx 2*8(%rsi), %r8, %rbx
+ mulx 3*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r10
+ mulx 4*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %r9, %r11
+ adox %r9, %r11
+
+ mov 2*8(%rcx), %rdx
+ mulx 1*8(%rsi), %r8, %rbp
+ mulx 2*8(%rsi), %r8, %rbx
+ adcx %rbp, %rax
+ adox %r8, %rax
+ adcx %rbx, %r10
+ mulx 3*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adcx %rbx, %r11
+ mulx 4*8(%rsi), %r8, %rbx
+ adox %r8, %r11
+ adcx %r9, %rbx
+ adox %r9, %rbx
+
+ mov 3*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %r12
+ mulx 1*8(%rsi), %r8, %rbp
+ adcx %r12, %rax
+ adox %r8, %rax
+ adcx %rbp, %r10
+ mulx 2*8(%rsi), %r8, %rbp
+ adox %r8, %r10
+ adcx %rbp, %r11
+ mulx 3*8(%rsi), %r8, %rbp
+ adox %r8, %r11
+ adcx %rbp, %rbx
+ mulx 4*8(%rsi), %r8, %rbp
+ adox %r8, %rbx
+ adcx %r9, %rbp
+ adox %r9, %rbp
+
+ mov 4*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %r12
+ adcx %r8, %rax
+ adcx %r12, %r10
+ mulx 1*8(%rsi), %r8, %r12
+ adox %r8, %r10
+ adox %r12, %r11
+ mulx 2*8(%rsi), %r8, %r12
+ adcx %r8, %r11
+ adcx %r12, %rbx
+ mulx 3*8(%rsi), %r8, %r12
+ adox %r8, %rbx
+ adox %r12, %rbp
+ mulx 4*8(%rsi), %r8, %r12
+ adcx %r8, %rbp
+ adcx %r9, %r12
+ adox %r9, %r12
+
+ mov %r10, 0*8(%rdi)
+ mov %r11, 1*8(%rdi)
+ mov %rbx, 2*8(%rdi)
+ mov %rbp, 3*8(%rdi)
+ mov %r12, 4*8(%rdi)
+
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_5_end:
+SIZE(flint_mpn_mulhigh_5, .flint_mpn_mulhigh_5_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_6.asm b/src/mpn_extras/broadwell/mulhigh_6.asm
new file mode 100644
index 0000000000..686a744d46
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_6.asm
@@ -0,0 +1,134 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_6)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_6)
+
+FUNC(flint_mpn_mulhigh_6):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %r9d, %r9d
+
+ mulx 4*8(%rsi), %r8, %rax
+ mulx 5*8(%rsi), %r8, %r10
+ adcx %r8, %rax
+ adcx %r9, %r10
+
+ mov 1*8(%rcx), %rdx
+ mulx 3*8(%rsi), %r8, %rbx
+ mulx 4*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r10
+ mulx 5*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %r9, %r11
+ adox %r9, %r11
+
+ mov 2*8(%rcx), %rdx
+ mulx 2*8(%rsi), %r8, %rbp
+ mulx 3*8(%rsi), %r8, %rbx
+ adcx %rbp, %rax
+ adox %r8, %rax
+ adcx %rbx, %r10
+ mulx 4*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adcx %rbx, %r11
+ mulx 5*8(%rsi), %r8, %rbx
+ adox %r8, %r11
+ adcx %r9, %rbx
+ adox %r9, %rbx
+
+ mov 3*8(%rcx), %rdx
+ mulx 1*8(%rsi), %r8, %r12
+ mulx 2*8(%rsi), %r8, %rbp
+ adcx %r12, %rax
+ adox %r8, %rax
+ adcx %rbp, %r10
+ mulx 3*8(%rsi), %r8, %rbp
+ adox %r8, %r10
+ adcx %rbp, %r11
+ mulx 4*8(%rsi), %r8, %rbp
+ adox %r8, %r11
+ adcx %rbp, %rbx
+ mulx 5*8(%rsi), %r8, %rbp
+ adox %r8, %rbx
+ adcx %r9, %rbp
+ adox %r9, %rbp
+
+ mov 4*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %r13
+ mulx 1*8(%rsi), %r8, %r12
+ adcx %r13, %rax
+ adox %r8, %rax
+ adcx %r12, %r10
+ mulx 2*8(%rsi), %r8, %r12
+ adox %r8, %r10
+ adcx %r12, %r11
+ mulx 3*8(%rsi), %r8, %r12
+ adox %r8, %r11
+ adcx %r12, %rbx
+ mulx 4*8(%rsi), %r8, %r12
+ adox %r8, %rbx
+ adcx %r12, %rbp
+ mulx 5*8(%rsi), %r8, %r12
+ adox %r8, %rbp
+ adcx %r9, %r12
+ adox %r9, %r12
+
+ mov 5*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %r13
+ adcx %r8, %rax
+ adcx %r13, %r10
+ mulx 1*8(%rsi), %r8, %r13
+ adox %r8, %r10
+ adox %r13, %r11
+ mulx 2*8(%rsi), %r8, %r13
+ adcx %r8, %r11
+ adcx %r13, %rbx
+ mulx 3*8(%rsi), %r8, %r13
+ adox %r8, %rbx
+ adox %r13, %rbp
+ mulx 4*8(%rsi), %r8, %r13
+ adcx %r8, %rbp
+ adcx %r13, %r12
+ mulx 5*8(%rsi), %r8, %r13
+ adox %r8, %r12
+ adcx %r9, %r13
+ adox %r9, %r13
+
+ mov %r10, 0*8(%rdi)
+ mov %r11, 1*8(%rdi)
+ mov %rbx, 2*8(%rdi)
+ mov %rbp, 3*8(%rdi)
+ mov %r12, 4*8(%rdi)
+ mov %r13, 5*8(%rdi)
+
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_6_end:
+SIZE(flint_mpn_mulhigh_6, .flint_mpn_mulhigh_6_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_7.asm b/src/mpn_extras/broadwell/mulhigh_7.asm
new file mode 100644
index 0000000000..7528aae87b
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_7.asm
@@ -0,0 +1,163 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_7)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_7)
+
+FUNC(flint_mpn_mulhigh_7):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %r9d, %r9d
+
+ mulx 5*8(%rsi), %r8, %rax
+ mulx 6*8(%rsi), %r8, %r10
+ adcx %r8, %rax
+ adcx %r9, %r10
+
+ mov 1*8(%rcx), %rdx
+ mulx 4*8(%rsi), %r8, %rbx
+ mulx 5*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r10
+ mulx 6*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %r9, %r11
+ adox %r9, %r11
+
+ mov 2*8(%rcx), %rdx
+ mulx 3*8(%rsi), %r8, %rbp
+ mulx 4*8(%rsi), %r8, %rbx
+ adcx %rbp, %rax
+ adox %r8, %rax
+ adcx %rbx, %r10
+ mulx 5*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adcx %rbx, %r11
+ mulx 6*8(%rsi), %r8, %rbx
+ adox %r8, %r11
+ adcx %r9, %rbx
+ adox %r9, %rbx
+
+ mov 3*8(%rcx), %rdx
+ mulx 2*8(%rsi), %r8, %r12
+ mulx 3*8(%rsi), %r8, %rbp
+ adcx %r12, %rax
+ adox %r8, %rax
+ adcx %rbp, %r10
+ mulx 4*8(%rsi), %r8, %rbp
+ adox %r8, %r10
+ adcx %rbp, %r11
+ mulx 5*8(%rsi), %r8, %rbp
+ adox %r8, %r11
+ adcx %rbp, %rbx
+ mulx 6*8(%rsi), %r8, %rbp
+ adox %r8, %rbx
+ adcx %r9, %rbp
+ adox %r9, %rbp
+
+ mov 4*8(%rcx), %rdx
+ mulx 1*8(%rsi), %r8, %r13
+ mulx 2*8(%rsi), %r8, %r12
+ adcx %r13, %rax
+ adox %r8, %rax
+ adcx %r12, %r10
+ mulx 3*8(%rsi), %r8, %r12
+ adox %r8, %r10
+ adcx %r12, %r11
+ mulx 4*8(%rsi), %r8, %r12
+ adox %r8, %r11
+ adcx %r12, %rbx
+ mulx 5*8(%rsi), %r8, %r12
+ adox %r8, %rbx
+ adcx %r12, %rbp
+ mulx 6*8(%rsi), %r8, %r12
+ adox %r8, %rbp
+ adcx %r9, %r12
+ adox %r9, %r12
+
+ mov 5*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %r14
+ mulx 1*8(%rsi), %r8, %r13
+ adcx %r14, %rax
+ adox %r8, %rax
+ adcx %r13, %r10
+ mulx 2*8(%rsi), %r8, %r13
+ adox %r8, %r10
+ adcx %r13, %r11
+ mulx 3*8(%rsi), %r8, %r13
+ adox %r8, %r11
+ adcx %r13, %rbx
+ mulx 4*8(%rsi), %r8, %r13
+ adox %r8, %rbx
+ adcx %r13, %rbp
+ mulx 5*8(%rsi), %r8, %r13
+ adox %r8, %rbp
+ adcx %r13, %r12
+ mulx 6*8(%rsi), %r8, %r13
+ adox %r8, %r12
+ adcx %r9, %r13
+ adox %r9, %r13
+
+ mov 6*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %r14
+ adcx %r8, %rax
+ adcx %r14, %r10
+ mulx 1*8(%rsi), %r8, %r14
+ adox %r8, %r10
+ adox %r14, %r11
+ mulx 2*8(%rsi), %r8, %r14
+ adcx %r8, %r11
+ adcx %r14, %rbx
+ mulx 3*8(%rsi), %r8, %r14
+ adox %r8, %rbx
+ adox %r14, %rbp
+ mulx 4*8(%rsi), %r8, %r14
+ adcx %r8, %rbp
+ adcx %r14, %r12
+ mulx 5*8(%rsi), %r8, %r14
+ adox %r8, %r12
+ adox %r14, %r13
+ mulx 6*8(%rsi), %r8, %r14
+ adcx %r8, %r13
+ adcx %r9, %r14
+ adox %r9, %r14
+
+ mov %r10, 0*8(%rdi)
+ mov %r11, 1*8(%rdi)
+ mov %rbx, 2*8(%rdi)
+ mov %rbp, 3*8(%rdi)
+ mov %r12, 4*8(%rdi)
+ mov %r13, 5*8(%rdi)
+ mov %r14, 6*8(%rdi)
+
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_7_end:
+SIZE(flint_mpn_mulhigh_7, .flint_mpn_mulhigh_7_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_8.asm b/src/mpn_extras/broadwell/mulhigh_8.asm
new file mode 100644
index 0000000000..2c2983fe34
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_8.asm
@@ -0,0 +1,195 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_8)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_8)
+
+FUNC(flint_mpn_mulhigh_8):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %r9d, %r9d
+
+ mulx 6*8(%rsi), %r8, %rax
+ mulx 7*8(%rsi), %r8, %r10
+ adcx %r8, %rax
+ adcx %r9, %r10
+
+ mov 1*8(%rcx), %rdx
+ mulx 5*8(%rsi), %r8, %rbx
+ mulx 6*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r10
+ mulx 7*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %r9, %r11
+ adox %r9, %r11
+
+ mov 2*8(%rcx), %rdx
+ mulx 4*8(%rsi), %r8, %rbp
+ mulx 5*8(%rsi), %r8, %rbx
+ adcx %rbp, %rax
+ adox %r8, %rax
+ adcx %rbx, %r10
+ mulx 6*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adcx %rbx, %r11
+ mulx 7*8(%rsi), %r8, %rbx
+ adox %r8, %r11
+ adcx %r9, %rbx
+ adox %r9, %rbx
+
+ mov 3*8(%rcx), %rdx
+ mulx 3*8(%rsi), %r8, %r12
+ mulx 4*8(%rsi), %r8, %rbp
+ adcx %r12, %rax
+ adox %r8, %rax
+ adcx %rbp, %r10
+ mulx 5*8(%rsi), %r8, %rbp
+ adox %r8, %r10
+ adcx %rbp, %r11
+ mulx 6*8(%rsi), %r8, %rbp
+ adox %r8, %r11
+ adcx %rbp, %rbx
+ mulx 7*8(%rsi), %r8, %rbp
+ adox %r8, %rbx
+ adcx %r9, %rbp
+ adox %r9, %rbp
+
+ mov 4*8(%rcx), %rdx
+ mulx 2*8(%rsi), %r8, %r13
+ mulx 3*8(%rsi), %r8, %r12
+ adcx %r13, %rax
+ adox %r8, %rax
+ adcx %r12, %r10
+ mulx 4*8(%rsi), %r8, %r12
+ adox %r8, %r10
+ adcx %r12, %r11
+ mulx 5*8(%rsi), %r8, %r12
+ adox %r8, %r11
+ adcx %r12, %rbx
+ mulx 6*8(%rsi), %r8, %r12
+ adox %r8, %rbx
+ adcx %r12, %rbp
+ mulx 7*8(%rsi), %r8, %r12
+ adox %r8, %rbp
+ adcx %r9, %r12
+ adox %r9, %r12
+
+ mov 5*8(%rcx), %rdx
+ mulx 1*8(%rsi), %r8, %r14
+ mulx 2*8(%rsi), %r8, %r13
+ adcx %r14, %rax
+ adox %r8, %rax
+ adcx %r13, %r10
+ mulx 3*8(%rsi), %r8, %r13
+ adox %r8, %r10
+ adcx %r13, %r11
+ mulx 4*8(%rsi), %r8, %r13
+ adox %r8, %r11
+ adcx %r13, %rbx
+ mulx 5*8(%rsi), %r8, %r13
+ adox %r8, %rbx
+ adcx %r13, %rbp
+ mulx 6*8(%rsi), %r8, %r13
+ adox %r8, %rbp
+ adcx %r13, %r12
+ mulx 7*8(%rsi), %r8, %r13
+ adox %r8, %r12
+ adcx %r9, %r13
+ adox %r9, %r13
+
+ mov 6*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %r15
+ mulx 1*8(%rsi), %r8, %r14
+ adcx %r15, %rax
+ adox %r8, %rax
+ adcx %r14, %r10
+ mulx 2*8(%rsi), %r8, %r14
+ adox %r8, %r10
+ adcx %r14, %r11
+ mulx 3*8(%rsi), %r8, %r14
+ adox %r8, %r11
+ adcx %r14, %rbx
+ mulx 4*8(%rsi), %r8, %r14
+ adox %r8, %rbx
+ adcx %r14, %rbp
+ mulx 5*8(%rsi), %r8, %r14
+ adox %r8, %rbp
+ adcx %r14, %r12
+ mulx 6*8(%rsi), %r8, %r14
+ adox %r8, %r12
+ adcx %r14, %r13
+ mulx 7*8(%rsi), %r8, %r14
+ adox %r8, %r13
+ adcx %r9, %r14
+ adox %r9, %r14
+
+ mov 7*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %r15
+ adcx %r8, %rax
+ adcx %r15, %r10
+ mulx 1*8(%rsi), %r8, %r15
+ adox %r8, %r10
+ adox %r15, %r11
+ mulx 2*8(%rsi), %r8, %r15
+ adcx %r8, %r11
+ adcx %r15, %rbx
+ mulx 3*8(%rsi), %r8, %r15
+ adox %r8, %rbx
+ adox %r15, %rbp
+ mulx 4*8(%rsi), %r8, %r15
+ adcx %r8, %rbp
+ adcx %r15, %r12
+ mulx 5*8(%rsi), %r8, %r15
+ adox %r8, %r12
+ adox %r15, %r13
+ mulx 6*8(%rsi), %r8, %r15
+ adcx %r8, %r13
+ adcx %r15, %r14
+ mulx 7*8(%rsi), %r8, %r15
+ adox %r8, %r14
+ adcx %r9, %r15
+ adox %r9, %r15
+
+ mov %r10, 0*8(%rdi)
+ mov %r11, 1*8(%rdi)
+ mov %rbx, 2*8(%rdi)
+ mov %rbp, 3*8(%rdi)
+ mov %r12, 4*8(%rdi)
+ mov %r13, 5*8(%rdi)
+ mov %r14, 6*8(%rdi)
+ mov %r15, 7*8(%rdi)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_8_end:
+SIZE(flint_mpn_mulhigh_8, .flint_mpn_mulhigh_8_end)
+.cfi_endproc
diff --git a/src/mpn_extras/broadwell/mulhigh_9.asm b/src/mpn_extras/broadwell/mulhigh_9.asm
new file mode 100644
index 0000000000..ba4438eb0b
--- /dev/null
+++ b/src/mpn_extras/broadwell/mulhigh_9.asm
@@ -0,0 +1,230 @@
+#
+# Copyright (C) 2024 Albin Ahlbäck
+#
+# This file is part of FLINT.
+#
+# FLINT is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License (LGPL) as published
+# by the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version. See .
+#
+
+include(`config.m4')dnl
+dnl
+.text
+
+.global FUNC(flint_mpn_mulhigh_9)
+.p2align 4, 0x90
+TYPE(flint_mpn_mulhigh_9)
+
+FUNC(flint_mpn_mulhigh_9):
+ .cfi_startproc
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rdi
+
+ mov %rdx, %rcx
+ mov 0*8(%rdx), %rdx
+ xor %r9d, %r9d
+
+ mulx 7*8(%rsi), %r8, %rax
+ mulx 8*8(%rsi), %r8, %r10
+ adcx %r8, %rax
+ adcx %r9, %r10
+
+ mov 1*8(%rcx), %rdx
+ mulx 6*8(%rsi), %r8, %rbx
+ mulx 7*8(%rsi), %r8, %r11
+ adcx %rbx, %rax
+ adox %r8, %rax
+ adcx %r11, %r10
+ mulx 8*8(%rsi), %r8, %r11
+ adox %r8, %r10
+ adcx %r9, %r11
+ adox %r9, %r11
+
+ mov 2*8(%rcx), %rdx
+ mulx 5*8(%rsi), %r8, %rbp
+ mulx 6*8(%rsi), %r8, %rbx
+ adcx %rbp, %rax
+ adox %r8, %rax
+ adcx %rbx, %r10
+ mulx 7*8(%rsi), %r8, %rbx
+ adox %r8, %r10
+ adcx %rbx, %r11
+ mulx 8*8(%rsi), %r8, %rbx
+ adox %r8, %r11
+ adcx %r9, %rbx
+ adox %r9, %rbx
+
+ mov 3*8(%rcx), %rdx
+ mulx 4*8(%rsi), %r8, %r12
+ mulx 5*8(%rsi), %r8, %rbp
+ adcx %r12, %rax
+ adox %r8, %rax
+ adcx %rbp, %r10
+ mulx 6*8(%rsi), %r8, %rbp
+ adox %r8, %r10
+ adcx %rbp, %r11
+ mulx 7*8(%rsi), %r8, %rbp
+ adox %r8, %r11
+ adcx %rbp, %rbx
+ mulx 8*8(%rsi), %r8, %rbp
+ adox %r8, %rbx
+ adcx %r9, %rbp
+ adox %r9, %rbp
+
+ mov 4*8(%rcx), %rdx
+ mulx 3*8(%rsi), %r8, %r13
+ mulx 4*8(%rsi), %r8, %r12
+ adcx %r13, %rax
+ adox %r8, %rax
+ adcx %r12, %r10
+ mulx 5*8(%rsi), %r8, %r12
+ adox %r8, %r10
+ adcx %r12, %r11
+ mulx 6*8(%rsi), %r8, %r12
+ adox %r8, %r11
+ adcx %r12, %rbx
+ mulx 7*8(%rsi), %r8, %r12
+ adox %r8, %rbx
+ adcx %r12, %rbp
+ mulx 8*8(%rsi), %r8, %r12
+ adox %r8, %rbp
+ adcx %r9, %r12
+ adox %r9, %r12
+
+ mov 5*8(%rcx), %rdx
+ mulx 2*8(%rsi), %r8, %r14
+ mulx 3*8(%rsi), %r8, %r13
+ adcx %r14, %rax
+ adox %r8, %rax
+ adcx %r13, %r10
+ mulx 4*8(%rsi), %r8, %r13
+ adox %r8, %r10
+ adcx %r13, %r11
+ mulx 5*8(%rsi), %r8, %r13
+ adox %r8, %r11
+ adcx %r13, %rbx
+ mulx 6*8(%rsi), %r8, %r13
+ adox %r8, %rbx
+ adcx %r13, %rbp
+ mulx 7*8(%rsi), %r8, %r13
+ adox %r8, %rbp
+ adcx %r13, %r12
+ mulx 8*8(%rsi), %r8, %r13
+ adox %r8, %r12
+ adcx %r9, %r13
+ adox %r9, %r13
+
+ mov 6*8(%rcx), %rdx
+ mulx 1*8(%rsi), %r8, %r15
+ mulx 2*8(%rsi), %r8, %r14
+ adcx %r15, %rax
+ adox %r8, %rax
+ adcx %r14, %r10
+ mulx 3*8(%rsi), %r8, %r14
+ adox %r8, %r10
+ adcx %r14, %r11
+ mulx 4*8(%rsi), %r8, %r14
+ adox %r8, %r11
+ adcx %r14, %rbx
+ mulx 5*8(%rsi), %r8, %r14
+ adox %r8, %rbx
+ adcx %r14, %rbp
+ mulx 6*8(%rsi), %r8, %r14
+ adox %r8, %rbp
+ adcx %r14, %r12
+ mulx 7*8(%rsi), %r8, %r14
+ adox %r8, %r12
+ adcx %r14, %r13
+ mulx 8*8(%rsi), %r8, %r14
+ adox %r8, %r13
+ adcx %r9, %r14
+ adox %r9, %r14
+
+ mov 7*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rdi
+ mulx 1*8(%rsi), %r8, %r15
+ adcx %rdi, %rax
+ adox %r8, %rax
+ adcx %r15, %r10
+ mulx 2*8(%rsi), %r8, %r15
+ adox %r8, %r10
+ adcx %r15, %r11
+ mulx 3*8(%rsi), %r8, %r15
+ adox %r8, %r11
+ adcx %r15, %rbx
+ mulx 4*8(%rsi), %r8, %r15
+ adox %r8, %rbx
+ adcx %r15, %rbp
+ mulx 5*8(%rsi), %r8, %r15
+ adox %r8, %rbp
+ adcx %r15, %r12
+ mulx 6*8(%rsi), %r8, %r15
+ adox %r8, %r12
+ adcx %r15, %r13
+ mulx 7*8(%rsi), %r8, %r15
+ adox %r8, %r13
+ adcx %r15, %r14
+ mulx 8*8(%rsi), %r8, %r15
+ adox %r8, %r14
+ adcx %r9, %r15
+ adox %r9, %r15
+
+ mov 8*8(%rcx), %rdx
+ mulx 0*8(%rsi), %r8, %rdi
+ adcx %r8, %rax
+ adcx %rdi, %r10
+ mulx 1*8(%rsi), %r8, %rdi
+ adox %r8, %r10
+ adox %rdi, %r11
+ mulx 2*8(%rsi), %r8, %rdi
+ adcx %r8, %r11
+ adcx %rdi, %rbx
+ mulx 3*8(%rsi), %r8, %rdi
+ adox %r8, %rbx
+ adox %rdi, %rbp
+ mulx 4*8(%rsi), %r8, %rdi
+ adcx %r8, %rbp
+ adcx %rdi, %r12
+ mulx 5*8(%rsi), %r8, %rdi
+ adox %r8, %r12
+ adox %rdi, %r13
+ mulx 6*8(%rsi), %r8, %rdi
+ adcx %r8, %r13
+ adcx %rdi, %r14
+ mulx 7*8(%rsi), %r8, %rdi
+ adox %r8, %r14
+ adox %rdi, %r15
+ mulx 8*8(%rsi), %r8, %rdi
+ adcx %r8, %r15
+ adcx %r9, %rdi
+ pop %r8
+ adox %r9, %rdi
+
+ mov %r10, 0*8(%r8)
+ mov %r11, 1*8(%r8)
+ mov %rbx, 2*8(%r8)
+ mov %rbp, 3*8(%r8)
+ mov %r12, 4*8(%r8)
+ mov %r13, 5*8(%r8)
+ mov %r14, 6*8(%r8)
+ mov %r15, 7*8(%r8)
+ mov %rdi, 8*8(%r8)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+.flint_mpn_mulhigh_9_end:
+SIZE(flint_mpn_mulhigh_9, .flint_mpn_mulhigh_9_end)
+.cfi_endproc
diff --git a/src/mpn_extras/mulhigh.c b/src/mpn_extras/mulhigh.c
new file mode 100644
index 0000000000..57365b5cfd
--- /dev/null
+++ b/src/mpn_extras/mulhigh.c
@@ -0,0 +1,45 @@
+/*
+ Copyright (C) 2024 Albin Ahlbäck
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "mpn_extras.h"
+
+#if FLINT_HAVE_ADX
+mp_limb_t flint_mpn_mulhigh_1(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_2(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_3(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_4(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_5(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_6(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_7(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_8(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_9(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_10(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_11(mp_ptr, mp_srcptr, mp_srcptr);
+mp_limb_t flint_mpn_mulhigh_12(mp_ptr, mp_srcptr, mp_srcptr);
+
+const flint_mpn_mul_func_t flint_mpn_mulhigh_n_func_tab[] =
+{
+ flint_mpn_mulhigh_1,
+ flint_mpn_mulhigh_2,
+ flint_mpn_mulhigh_3,
+ flint_mpn_mulhigh_4,
+ flint_mpn_mulhigh_5,
+ flint_mpn_mulhigh_6,
+ flint_mpn_mulhigh_7,
+ flint_mpn_mulhigh_8,
+ flint_mpn_mulhigh_9,
+ flint_mpn_mulhigh_10,
+ flint_mpn_mulhigh_11,
+ flint_mpn_mulhigh_12
+};
+#else
+typedef int this_file_is_empty;
+#endif
diff --git a/src/mpn_extras/profile/p-mulhigh_basecase.c b/src/mpn_extras/profile/p-mulhigh_basecase.c
new file mode 100644
index 0000000000..e26068f1f7
--- /dev/null
+++ b/src/mpn_extras/profile/p-mulhigh_basecase.c
@@ -0,0 +1,57 @@
+/*
+ Copyright (C) 2024 Albin Ahlbäck
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "mpn_extras.h"
+#include "profiler.h"
+
+#define mpn_mul_basecase __gmpn_mul_basecase
+void mpn_mul_basecase(mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void mpfr_mulhigh_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define N_MAX FLINT_MPN_MULHIGH_N_FUNC_TAB_WIDTH
+
+int main(void)
+{
+ mp_limb_t rf[N_MAX];
+ mp_limb_t rg[2 * N_MAX];
+ mp_limb_t rm[2 * N_MAX];
+ mp_limb_t xp[N_MAX];
+ mp_limb_t yp[N_MAX];
+ mp_size_t n;
+
+ flint_printf(" GMP MPFR\n");
+ for (n = 1; n <= N_MAX; n++)
+ {
+ double t1, t2, t3, __attribute__((unused)) __;
+ flint_printf("n = %2wd:", n);
+
+ mpn_random2(xp, n);
+ mpn_random2(yp, n);
+
+ TIMEIT_START
+ flint_mpn_mulhigh_n(rf, xp, yp, n);
+ TIMEIT_STOP_VALUES(__, t1)
+
+ TIMEIT_START
+ mpn_mul_basecase(rg, xp, n, yp, n);
+ TIMEIT_STOP_VALUES(__, t2)
+
+ TIMEIT_START
+ mpfr_mulhigh_n(rm, xp, yp, n);
+ TIMEIT_STOP_VALUES(__, t3)
+
+ flint_printf("%7.2fx %7.2fx\n", t2 / t1, t3 / t1);
+ }
+
+ flint_cleanup_master();
+
+ return 0;
+}
diff --git a/src/mpn_extras/test/main.c b/src/mpn_extras/test/main.c
index 7cb72d0a28..fb7a0307d4 100644
--- a/src/mpn_extras/test/main.c
+++ b/src/mpn_extras/test/main.c
@@ -23,6 +23,7 @@
#include "t-mul.c"
#include "t-mul_n.c"
#include "t-mul_basecase.c"
+#include "t-mulhigh_n.c"
#include "t-mulmod_2expp1.c"
#include "t-mulmod_preinv1.c"
#include "t-mulmod_preinvn.c"
@@ -43,6 +44,7 @@ test_struct tests[] =
TEST_FUNCTION(flint_mpn_mul),
TEST_FUNCTION(flint_mpn_mul_n),
TEST_FUNCTION(flint_mpn_mul_basecase),
+ TEST_FUNCTION(flint_mpn_mulhigh_n),
TEST_FUNCTION(flint_mpn_mulmod_2expp1),
TEST_FUNCTION(flint_mpn_mulmod_preinv1),
TEST_FUNCTION(flint_mpn_mulmod_preinvn),
diff --git a/src/mpn_extras/test/t-mulhigh_n.c b/src/mpn_extras/test/t-mulhigh_n.c
new file mode 100644
index 0000000000..7484079a9a
--- /dev/null
+++ b/src/mpn_extras/test/t-mulhigh_n.c
@@ -0,0 +1,119 @@
+/*
+ Copyright (C) 2024 Albin Ahlbäck
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include
+#include "test_helpers.h"
+#include "mpn_extras.h"
+
+#if FLINT_MPN_MULHIGH_N_FUNC_TAB_WIDTH != 0
+
+# define TEST_MPFR 0
+# define N_MAX FLINT_MPN_MULHIGH_N_FUNC_TAB_WIDTH
+
+void mpfr_mulhigh_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n);
+
+TEST_FUNCTION_START(flint_mpn_mulhigh_n, state)
+{
+ slong ix;
+ int result;
+
+ for (ix = 0; ix < 100000 * flint_test_multiplier(); ix++)
+ {
+ mp_limb_t rp[N_MAX + 1] = {UWORD(0)};
+ mp_limb_t rp_upperbound[2 * N_MAX] = {UWORD(0)};
+ mp_limb_t rp_lowerbound[2 * N_MAX] = {UWORD(0)};
+#if TEST_MPFR
+ mp_limb_t rp_mpfr[2 * N_MAX] = {UWORD(0)};
+#endif
+ mp_limb_t borrow;
+ mp_limb_t xp[N_MAX];
+ mp_limb_t yp[N_MAX];
+ mp_size_t n;
+ mp_limb_t lb;
+
+ n = 1 + n_randint(state, N_MAX);
+
+ mpn_random2(xp, n);
+ mpn_random2(yp, n);
+
+ rp[0] = flint_mpn_mulhigh_n(rp + 1, xp, yp, n);
+
+ /* Check upper bound */
+ flint_mpn_mul_n(rp_upperbound, xp, yp, n);
+
+ result = (mpn_cmp(rp, rp_upperbound + n - 1, n + 1) <= 0);
+ if (!result)
+ TEST_FUNCTION_FAIL(
+ "rp > rp_upperbound\n"
+ "ix = %wd\n"
+ "n = %wd\n"
+ "xp = %{ulong*}\n"
+ "yp = %{ulong*}\n"
+ "rp = %{ulong*}\n"
+ "rp_upperbound = %{ulong*}\n",
+ ix, n, xp, n, yp, n, rp, n + 1, rp_upperbound + n - 1, n + 1);
+
+ /* Check lower bound */
+ memcpy(rp_lowerbound, rp_upperbound, 2 * n * sizeof(mp_limb_t));
+
+ if (n <= 2)
+ lb = 0; /* Multiplication should be "exact" */
+ else if (n == 3)
+ lb = 2;
+ else if (n == 4)
+ lb = 4;
+ else if (n < 6)
+ lb = n + 1;
+ else
+ lb = n + 2;
+
+ borrow = mpn_sub_1(rp_lowerbound + n - 1, rp_lowerbound + n - 1, n + 1, lb);
+ if (borrow)
+ mpn_zero(rp_lowerbound, 2 * n);
+
+ result = (mpn_cmp(rp, rp_lowerbound + n - 1, n + 1) >= 0);
+ if (!result)
+ TEST_FUNCTION_FAIL(
+ "rp < rp_lowerbound\n"
+ "ix = %wd\n"
+ "n = %wd\n"
+ "xp = %{ulong*}\n"
+ "yp = %{ulong*}\n"
+ "rp = %{ulong*}\n"
+ "rp_lowerbound = %{ulong*}\n",
+ ix, n, xp, n, yp, n, rp, n + 1, rp_lowerbound + n - 1, n + 1);
+
+#if TEST_MPFR
+ /* Check against MPFR */
+ mpfr_mulhigh_n(rp_mpfr, xp, yp, n);
+
+ result = (mpn_cmp(rp + n - 1, rp_mpfr + n - 1, n + 1) == 0);
+ if (!result)
+ TEST_FUNCTION_FAIL(
+ "flint_mpn_mulhigh_n does not agree with mpfr_mulhigh_n\n"
+ "ix = %wd\n"
+ "n = %wd\n"
+ "xp = %{ulong*}\n"
+ "yp = %{ulong*}\n"
+ "rp = %{ulong*}\n"
+ "rp_mpfr = %{ulong*}\n",
+ ix, n, xp, n, yp, n, rp + n - 1, n + 1, rp_mpfr + n - 1, n + 1);
+#endif
+ }
+
+ TEST_FUNCTION_END(state);
+}
+#else
+TEST_FUNCTION_START(flint_mpn_mulhigh_n, state)
+{
+ TEST_FUNCTION_END_SKIPPED(state);
+}
+#endif