diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst
index 763d129f9a..d708a16c6a 100644
--- a/doc/source/nfloat.rst
+++ b/doc/source/nfloat.rst
@@ -317,11 +317,13 @@ code for reduced overhead.
 Matrix functions
 -------------------------------------------------------------------------------
 
-.. function:: int _nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
-              int _nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+.. function:: int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+              int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
               int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx)
               int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
 
+    Different implementations of matrix multiplication.
+
 Internal functions
 -------------------------------------------------------------------------------
 
diff --git a/src/nfloat.h b/src/nfloat.h
index 4a5d61497a..78b764a213 100644
--- a/src/nfloat.h
+++ b/src/nfloat.h
@@ -453,8 +453,8 @@ int _nfloat_vec_submul_scalar(nfloat_ptr res, nfloat_srcptr x, slong len, nfloat
 int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
 int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx);
 
-int _nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
-int _nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx);
 int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 
diff --git a/src/nfloat/mat_mul.c b/src/nfloat/mat_mul.c
index b943a4c33d..57253ed8ed 100644
--- a/src/nfloat/mat_mul.c
+++ b/src/nfloat/mat_mul.c
@@ -396,7 +396,7 @@ _nfloat_mat_mul_fixed(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int waksma
 
     extra_bits = Adelta + Bdelta + pad_top + pad_bot;
 
-    if (extra_bits > max_extra_bits)
+    if (extra_bits >= max_extra_bits)
         return gr_mat_mul_classical(C, A, B, ctx);
 
     Aexp = Amax + pad_top;
@@ -408,13 +408,13 @@ _nfloat_mat_mul_fixed(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int waksma
 }
 
 int
-_nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
 {
     return _nfloat_mat_mul_fixed(C, A, B, 0, 100000, ctx);
 }
 
 int
-_nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
 {
     return _nfloat_mat_mul_fixed(C, A, B, 1, 100000, ctx);
 }
diff --git a/src/nfloat/test/t-mat_mul.c b/src/nfloat/test/t-mat_mul.c
index abaab56e65..dad6ad625c 100644
--- a/src/nfloat/test/t-mat_mul.c
+++ b/src/nfloat/test/t-mat_mul.c
@@ -41,7 +41,7 @@ TEST_FUNCTION_START(mat_mul, state)
         GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 2, ctx));
 
         gr_mat_test_approx_mul_max_norm(
-            (gr_method_mat_binary_op) _nfloat_mat_mul_waksman,
+            (gr_method_mat_binary_op) nfloat_mat_mul_waksman,
             tol, state, (prec <= 256) ? 10 : 1, 10, ctx);
 
         gr_mat_test_approx_mul_max_norm(
@@ -50,7 +50,7 @@ TEST_FUNCTION_START(mat_mul, state)
                         (prec <= 256) ? 40 : 20, ctx);
 
         gr_mat_test_approx_mul_max_norm(
-            (gr_method_mat_binary_op) _nfloat_mat_mul_fixed_classical,
+            (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical,
             tol, state, (prec <= 256) ? 10 : 1,
                         (prec <= 256) ? 40 : 20, ctx);
 
@@ -77,7 +77,7 @@ TEST_FUNCTION_START(mat_mul, state)
         GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 6, ctx));
 
         gr_mat_test_approx_mul_pos_entrywise_accurate(
-            (gr_method_mat_binary_op) _nfloat_mat_mul_waksman,
+            (gr_method_mat_binary_op) nfloat_mat_mul_waksman,
             tol, state, (prec <= 256) ? 10 : 1, 10, ctx);
 
         gr_mat_test_approx_mul_pos_entrywise_accurate(
@@ -86,7 +86,7 @@ TEST_FUNCTION_START(mat_mul, state)
                         (prec <= 256) ? 40 : 20, ctx);
 
         gr_mat_test_approx_mul_pos_entrywise_accurate(
-            (gr_method_mat_binary_op) _nfloat_mat_mul_fixed_classical,
+            (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical,
             tol, state, (prec <= 256) ? 10 : 1,
                         (prec <= 256) ? 40 : 20, ctx);