diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst index 763d129f9a..d708a16c6a 100644 --- a/doc/source/nfloat.rst +++ b/doc/source/nfloat.rst @@ -317,11 +317,13 @@ code for reduced overhead. Matrix functions ------------------------------------------------------------------------------- -.. function:: int _nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) - int _nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) +.. function:: int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) + int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx) int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) + Different implementations of matrix multiplication. + Internal functions ------------------------------------------------------------------------------- diff --git a/src/nfloat.h b/src/nfloat.h index 4a5d61497a..78b764a213 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -453,8 +453,8 @@ int _nfloat_vec_submul_scalar(nfloat_ptr res, nfloat_srcptr x, slong len, nfloat int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); -int _nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); -int _nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); +int nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); +int nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); int nfloat_mat_mul_block(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, slong min_block_size, gr_ctx_t ctx); int nfloat_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx); diff --git a/src/nfloat/mat_mul.c b/src/nfloat/mat_mul.c index b943a4c33d..57253ed8ed 100644 --- a/src/nfloat/mat_mul.c +++ b/src/nfloat/mat_mul.c @@ -396,7 +396,7 @@ _nfloat_mat_mul_fixed(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int waksma extra_bits = Adelta + Bdelta + pad_top + pad_bot; - if (extra_bits > max_extra_bits) + if (extra_bits >= max_extra_bits) return gr_mat_mul_classical(C, A, B, ctx); Aexp = Amax + pad_top; @@ -408,13 +408,13 @@ _nfloat_mat_mul_fixed(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, int waksma } int -_nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) +nfloat_mat_mul_fixed_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) { return _nfloat_mat_mul_fixed(C, A, B, 0, 100000, ctx); } int -_nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) +nfloat_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx) { return _nfloat_mat_mul_fixed(C, A, B, 1, 100000, ctx); } diff --git a/src/nfloat/test/t-mat_mul.c b/src/nfloat/test/t-mat_mul.c index abaab56e65..dad6ad625c 100644 --- a/src/nfloat/test/t-mat_mul.c +++ b/src/nfloat/test/t-mat_mul.c @@ -41,7 +41,7 @@ TEST_FUNCTION_START(mat_mul, state) GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 2, ctx)); gr_mat_test_approx_mul_max_norm( - (gr_method_mat_binary_op) _nfloat_mat_mul_waksman, + (gr_method_mat_binary_op) nfloat_mat_mul_waksman, tol, state, (prec <= 256) ? 10 : 1, 10, ctx); gr_mat_test_approx_mul_max_norm( @@ -50,7 +50,7 @@ TEST_FUNCTION_START(mat_mul, state) (prec <= 256) ? 40 : 20, ctx); gr_mat_test_approx_mul_max_norm( - (gr_method_mat_binary_op) _nfloat_mat_mul_fixed_classical, + (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical, tol, state, (prec <= 256) ? 10 : 1, (prec <= 256) ? 40 : 20, ctx); @@ -77,7 +77,7 @@ TEST_FUNCTION_START(mat_mul, state) GR_MUST_SUCCEED(gr_mul_2exp_si(tol, tol, -prec + 6, ctx)); gr_mat_test_approx_mul_pos_entrywise_accurate( - (gr_method_mat_binary_op) _nfloat_mat_mul_waksman, + (gr_method_mat_binary_op) nfloat_mat_mul_waksman, tol, state, (prec <= 256) ? 10 : 1, 10, ctx); gr_mat_test_approx_mul_pos_entrywise_accurate( @@ -86,7 +86,7 @@ TEST_FUNCTION_START(mat_mul, state) (prec <= 256) ? 40 : 20, ctx); gr_mat_test_approx_mul_pos_entrywise_accurate( - (gr_method_mat_binary_op) _nfloat_mat_mul_fixed_classical, + (gr_method_mat_binary_op) nfloat_mat_mul_fixed_classical, tol, state, (prec <= 256) ? 10 : 1, (prec <= 256) ? 40 : 20, ctx);