Skip to content

Commit

Permalink
improve row permutations in nmod_mat_lu_recursive
Browse files Browse the repository at this point in the history
  • Loading branch information
fredrik-johansson committed Jan 16, 2025
1 parent 3824177 commit b239de1
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 13 deletions.
43 changes: 32 additions & 11 deletions src/nmod_mat/lu_recursive.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#include "nmod_vec.h"
#include "nmod_mat.h"

/* Permute rows SLICE columns at a time to improve memory locality
and reduce temporary allocation. */
#define SLICE 32

static void
_apply_permutation(slong * AP, nmod_mat_t A, const slong * P,
slong num_rows, slong row_offset, slong num_cols, slong col_offset)
Expand All @@ -20,23 +24,40 @@ _apply_permutation(slong * AP, nmod_mat_t A, const slong * P,
{
ulong * Atmp;
slong * APtmp;
slong i;
slong i, c, l;
TMP_INIT;

TMP_START;

if (num_cols <= SLICE)
{
Atmp = TMP_ALLOC(sizeof(ulong) * num_rows * num_cols);

for (i = 0; i < num_rows; i++)
_nmod_vec_set(Atmp + i * num_cols, nmod_mat_entry_ptr(A, P[i] + row_offset, col_offset), num_cols);
for (i = 0; i < num_rows; i++)
_nmod_vec_set(nmod_mat_entry_ptr(A, i + row_offset, col_offset), Atmp + i * num_cols, num_cols);
}
else
{
Atmp = TMP_ALLOC(sizeof(ulong) * num_rows * SLICE);

/* todo: reduce memory allocation */
Atmp = flint_malloc(sizeof(ulong) * num_rows * num_cols);
/* todo: avoid temporary allocation when AP != P */
APtmp = flint_malloc(sizeof(slong) * num_rows);
for (c = 0; c < num_cols; c += SLICE)
{
l = FLINT_MIN(SLICE, num_cols - c);
for (i = 0; i < num_rows; i++)
_nmod_vec_set(Atmp + i * SLICE, nmod_mat_entry_ptr(A, P[i] + row_offset, col_offset + c), l);
for (i = 0; i < num_rows; i++)
_nmod_vec_set(nmod_mat_entry_ptr(A, i + row_offset, col_offset + c), Atmp + i * SLICE, l);
}
}

for (i = 0; i < num_rows; i++)
_nmod_vec_set(Atmp + i * num_cols, nmod_mat_entry_ptr(A, P[i] + row_offset, col_offset), num_cols);
for (i = 0; i < num_rows; i++)
_nmod_vec_set(nmod_mat_entry_ptr(A, i + row_offset, col_offset), Atmp + i * num_cols, num_cols);
APtmp = TMP_ALLOC(sizeof(slong) * num_rows);

for (i = 0; i < num_rows; i++) APtmp[i] = AP[P[i] + row_offset];
for (i = 0; i < num_rows; i++) AP[i + row_offset] = APtmp[i];

flint_free(Atmp);
flint_free(APtmp);
TMP_END;
}
}

Expand Down
13 changes: 11 additions & 2 deletions src/nmod_mat/test/t-lu_recursive.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,17 @@ TEST_FUNCTION_START(nmod_mat_lu_recursive, state)
slong m, n, r, d, rank;
slong * P;

m = n_randint(state, 30);
n = n_randint(state, 30);
if (n_randint(state, 100) == 0)
{
m = n_randint(state, 100);
n = n_randint(state, 100);
}
else
{
m = n_randint(state, 30);
n = n_randint(state, 30);
}

mod = n_randtest_prime(state, 0);

for (r = 0; r <= FLINT_MIN(m, n); r++)
Expand Down

0 comments on commit b239de1

Please sign in to comment.