diff --git a/examples/python/tutorial-3-basis.ipynb b/examples/python/tutorial-3-basis.ipynb index 02b4878285..a2141e4e9d 100644 --- a/examples/python/tutorial-3-basis.ipynb +++ b/examples/python/tutorial-3-basis.ipynb @@ -312,111 +312,6 @@ " # Check that (1' * G * u - u' * (G' * 1)) is numerically zero\n", " print('1T * G * u - uT * (GT * 1) =', np.abs(sum1 - sum2))" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Advanced topics" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* In the following example, we demonstrate the QR factorization of a basis matrix.\n", - "The representation is similar to LAPACK's [`dgeqrf`](https://www.netlib.org/lapack/explore-html/dd/d9a/group__double_g_ecomputational_ga3766ea903391b5cf9008132f7440ec7b.html#ga3766ea903391b5cf9008132f7440ec7b), with elementary reflectors in the lower triangular block, scaled by `tau`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "qr = np.array([1, -1, 4, 1, 4, -2, 1, 4, 2, 1, -1, 0], dtype=\"float64\")\n", - "tau = np.empty(3, dtype=\"float64\")\n", - "\n", - "qr, tau = ceed.qr_factorization(qr, tau, 4, 3)\n", - "\n", - "print('qr =')\n", - "print(qr.reshape(4, 3))\n", - "\n", - "print('tau =')\n", - "print(tau)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* In the following example, we demonstrate the symmetric Schur decomposition of a basis matrix" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "A = np.array([0.19996678, 0.0745459, -0.07448852, 0.0332866,\n", - " 0.0745459, 1., 0.16666509, -0.07448852,\n", - " -0.07448852, 0.16666509, 1., 0.0745459,\n", - " 0.0332866, -0.07448852, 0.0745459, 0.19996678], dtype=\"float64\")\n", - "\n", - "lam = ceed.symmetric_schur_decomposition(A, 4)\n", - "\n", - "print(\"Q =\")\n", - "for i in range(4):\n", - " for j in range(4):\n", - " if A[j+4*i] <= 1E-14 and A[j+4*i] >= -1E-14:\n", - " A[j+4*i] = 0\n", - " print(\"%12.8f\"%A[j+4*i])\n", - "\n", - "print(\"lambda =\")\n", - "for i in range(4):\n", - " if lam[i] <= 1E-14 and lam[i] >= -1E-14:\n", - " lam[i] = 0\n", - " print(\"%12.8f\"%lam[i])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* In the following example, we demonstrate the simultaneous diagonalization of a basis matrix" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "M = np.array([0.19996678, 0.0745459, -0.07448852, 0.0332866,\n", - " 0.0745459, 1., 0.16666509, -0.07448852,\n", - " -0.07448852, 0.16666509, 1., 0.0745459,\n", - " 0.0332866, -0.07448852, 0.0745459, 0.19996678], dtype=\"float64\")\n", - "K = np.array([3.03344425, -3.41501767, 0.49824435, -0.11667092,\n", - " -3.41501767, 5.83354662, -2.9167733, 0.49824435,\n", - " 0.49824435, -2.9167733, 5.83354662, -3.41501767,\n", - " -0.11667092, 0.49824435, -3.41501767, 3.03344425], dtype=\"float64\")\n", - "\n", - "x, lam = ceed.simultaneous_diagonalization(K, M, 4)\n", - "\n", - "print(\"x =\")\n", - "for i in range(4):\n", - " for j in range(4):\n", - " if x[j+4*i] <= 1E-14 and x[j+4*i] >= -1E-14:\n", - " x[j+4*i] = 0\n", - " print(\"%12.8f\"%x[j+4*i])\n", - "\n", - "print(\"lambda =\")\n", - "for i in range(4):\n", - " if lam[i] <= 1E-14 and lam[i] >= -1E-14:\n", - " lam[i] = 0\n", - " print(\"%12.8f\"%lam[i])" - ] } ], "metadata": { diff --git a/include/ceed/backend.h b/include/ceed/backend.h index a3789a64f9..f040f6f3db 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -181,8 +181,6 @@ typedef enum { CEED_EXTERN const char *const CeedFESpaces[]; CEED_EXTERN int CeedBasisGetCollocatedGrad(CeedBasis basis, CeedScalar *colo_grad_1d); -CEED_EXTERN int CeedHouseholderApplyQ(CeedScalar *A, const CeedScalar *Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, CeedInt n, - CeedInt k, CeedInt row, CeedInt col); CEED_EXTERN int CeedBasisIsTensor(CeedBasis basis, bool *is_tensor); CEED_EXTERN int CeedBasisGetData(CeedBasis basis, void *data); CEED_EXTERN int CeedBasisSetData(CeedBasis basis, void *data); @@ -282,5 +280,10 @@ CEED_EXTERN int CeedOperatorSetSetupDone(CeedOperator op); CEED_INTERN int CeedMatrixMatrixMultiply(Ceed ceed, const CeedScalar *mat_A, const CeedScalar *mat_B, CeedScalar *mat_C, CeedInt m, CeedInt n, CeedInt kk); +CEED_EXTERN int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n); +CEED_EXTERN int CeedHouseholderApplyQ(CeedScalar *mat_A, const CeedScalar *mat_Q, const CeedScalar *tau, CeedTransposeMode t_mode, CeedInt m, + CeedInt n, CeedInt k, CeedInt row, CeedInt col); +CEED_EXTERN int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n); +CEED_EXTERN int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *x, CeedScalar *lambda, CeedInt n); #endif diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index aa512569dc..01f1b6f478 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -388,9 +388,6 @@ CEED_EXTERN int CeedBasisDestroy(CeedBasis *basis); CEED_EXTERN int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d); CEED_EXTERN int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_1d); -CEED_EXTERN int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n); -CEED_EXTERN int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n); -CEED_EXTERN int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *x, CeedScalar *lambda, CeedInt n); /** Handle for the user provided CeedQFunction callback function diff --git a/interface/ceed-basis.c b/interface/ceed-basis.c index 7c69c44b34..05807a4c45 100644 --- a/interface/ceed-basis.c +++ b/interface/ceed-basis.c @@ -37,7 +37,7 @@ const CeedBasis CEED_BASIS_COLLOCATED = &ceed_basis_collocated; /** @brief Compute Householder reflection - Computes A = (I - b v v^T) A, where A is an mxn matrix indexed as A[i*row + j*col] + Computes A = (I - b v v^T) A, where A is an mxn matrix indexed as A[i*row + j*col] @param[in,out] A Matrix to apply Householder reflection to, in place @param[in] v Householder vector @@ -61,43 +61,10 @@ static int CeedHouseholderReflect(CeedScalar *A, const CeedScalar *v, CeedScalar return CEED_ERROR_SUCCESS; } -/** - @brief Apply Householder Q matrix - - Compute A = Q A, where Q is mxm and A is mxn. - - @param[in,out] A Matrix to apply Householder Q to, in place - @param[in] Q Householder Q matrix - @param[in] tau Householder scaling factors - @param[in] t_mode Transpose mode for application - @param[in] m Number of rows in A - @param[in] n Number of columns in A - @param[in] k Number of elementary reflectors in Q, kBasisCreateTensorH1) { - Ceed delegate; - CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis")); - - if (!delegate) { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1"); - // LCOV_EXCL_STOP - } - - CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis)); - return CEED_ERROR_SUCCESS; - } +int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) { + CeedScalar v[m]; - if (dim < 1) { + // Check matrix shape + if (n > m) { // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value"); + return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m"); // LCOV_EXCL_STOP } - if (num_comp < 1) { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component"); - // LCOV_EXCL_STOP - } + for (CeedInt i = 0; i < n; i++) { + if (i >= m - 1) { // last row of matrix, no reflection needed + tau[i] = 0.; + break; + } + // Calculate Householder vector, magnitude + CeedScalar sigma = 0.0; + v[i] = mat[i + n * i]; + for (CeedInt j = i + 1; j < m; j++) { + v[j] = mat[i + n * j]; + sigma += v[j] * v[j]; + } + CeedScalar norm = sqrt(v[i] * v[i] + sigma); // norm of v[i:m] + CeedScalar R_ii = -copysign(norm, v[i]); + v[i] -= R_ii; + // norm of v[i:m] after modification above and scaling below + // norm = sqrt(v[i]*v[i] + sigma) / v[i]; + // tau = 2 / (norm*norm) + tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma); + for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i]; - if (P_1d < 1) { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node"); - // LCOV_EXCL_STOP + // Apply Householder reflector to lower right panel + CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1); + // Save v + mat[i + n * i] = R_ii; + for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j]; } + return CEED_ERROR_SUCCESS; +} - if (Q_1d < 1) { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point"); - // LCOV_EXCL_STOP - } +/** + @brief Apply Householder Q matrix - CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX; + Compute mat_A = mat_Q mat_A, where mat_Q is mxm and mat_A is mxn. - CeedCall(CeedCalloc(1, basis)); - (*basis)->ceed = ceed; - CeedCall(CeedReference(ceed)); - (*basis)->ref_count = 1; - (*basis)->tensor_basis = 1; - (*basis)->dim = dim; - (*basis)->topo = topo; - (*basis)->num_comp = num_comp; - (*basis)->P_1d = P_1d; - (*basis)->Q_1d = Q_1d; - (*basis)->P = CeedIntPow(P_1d, dim); - (*basis)->Q = CeedIntPow(Q_1d, dim); - (*basis)->Q_comp = 1; - (*basis)->basis_space = 1; // 1 for H^1 space - CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d)); - CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d)); - if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0])); - if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0])); - CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d)); - CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d)); - if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0])); - if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0])); - CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis)); + @param[in,out] mat_A Matrix to apply Householder Q to, in place + @param[in] mat_Q Householder Q matrix + @param[in] tau Householder scaling factors + @param[in] t_mode Transpose mode for application + @param[in] m Number of rows in A + @param[in] n Number of columns in A + @param[in] k Number of elementary reflectors in Q, k= 0; i--) { + if (tau[i] > 0.0) { + v[i] = 1; + for (CeedInt j = i + 1; j < n - 1; j++) { + v[j] = mat_T[i + n * (j + 1)]; + mat_T[i + n * (j + 1)] = 0; + } + CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1); + } + } + + // Reduce sub and super diagonal + CeedInt p = 0, q = 0, itr = 0, max_itr = n * n * n * n; + CeedScalar tol = CEED_EPSILON; + + while (itr < max_itr) { + // Update p, q, size of reduced portions of diagonal + p = 0; + q = 0; + for (CeedInt i = n - 2; i >= 0; i--) { + if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1; + else break; + } + for (CeedInt i = 0; i < n - q - 1; i++) { + if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1; + else break; + } + if (q == n - 1) break; // Finished reducing + + // Reduce tridiagonal portion + CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)]; + CeedScalar d = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2; + CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d)); + CeedScalar x = mat_T[p + n * p] - mu; + CeedScalar z = mat_T[p + n * (p + 1)]; + for (CeedInt k = p; k < n - q - 1; k++) { + // Compute Givens rotation + CeedScalar c = 1, s = 0; + if (fabs(z) > tol) { + if (fabs(z) > fabs(x)) { + CeedScalar tau = -x / z; + s = 1 / sqrt(1 + tau * tau), c = s * tau; + } else { + CeedScalar tau = -z / x; + c = 1 / sqrt(1 + tau * tau), s = c * tau; } - grad_1d[i * P + k] = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx; - interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx; } - c1 = c2; + + // Apply Givens rotation to T + CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n); + CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n); + + // Apply Givens rotation to Q + CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n); + + // Update x, z + if (k < n - q - 2) { + x = mat_T[k + n * (k + 1)]; + z = mat_T[k + n * (k + 2)]; + } } + itr++; + } + + // Save eigenvalues + for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i]; + + // Check convergence + if (itr == max_itr && q < n - 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge"); + // LCOV_EXCL_STOP } - // Pass to CeedBasisCreateTensorH1 - CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis)); -cleanup: - CeedCall(CeedFree(&interp_1d)); - CeedCall(CeedFree(&grad_1d)); - CeedCall(CeedFree(&nodes)); - CeedCall(CeedFree(&q_ref_1d)); - CeedCall(CeedFree(&q_weight_1d)); return CEED_ERROR_SUCCESS; } +CeedPragmaOptimizeOn; /** - @brief Create a non tensor-product basis for \f$H^1\f$ discretizations + @brief Return Simultaneous Diagonalization of two matrices. - @param[in] ceed Ceed object where the CeedBasis will be created - @param[in] topo Topology of element, e.g. hypercube, simplex, ect - @param[in] num_comp Number of field components (1 for scalar fields) - @param[in] num_nodes Total number of nodes - @param[in] num_qpts Total number of quadrature points - @param[in] interp Row-major (num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points - @param[in] grad Row-major (num_qpts * dim * num_nodes) matrix expressing derivatives of nodal basis functions at quadrature points - @param[in] q_ref Array of length num_qpts * dim holding the locations of quadrature points on the reference element - @param[in] q_weight Array of length num_qpts holding the quadrature weights on the reference element - @param[out] basis Address of the variable where the newly created CeedBasis will be stored. + This solves the generalized eigenvalue problem A x = lambda B x, where A and B are symmetric and B is positive definite. + We generate the matrix X and vector Lambda such that X^T A X = Lambda and X^T B X = I. + This is equivalent to the LAPACK routine 'sygv' with TYPE = 1. + + @param[in] ceed Ceed context for error handling + @param[in] mat_A Row-major matrix to be factorized with eigenvalues + @param[in] mat_B Row-major matrix to be factorized to identity + @param[out] mat_X Row-major orthogonal matrix + @param[out] lambda Vector of length n of generalized eigenvalues + @param[in] n Number of rows/columns @return An error code: 0 - success, otherwise - failure - @ref User + @ref Utility **/ -int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, - const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) { - CeedInt P = num_nodes, Q = num_qpts, dim = 0; +CeedPragmaOptimizeOff int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, + CeedInt n) { + CeedScalar *mat_C, *mat_G, *vec_D; + CeedCall(CeedCalloc(n * n, &mat_C)); + CeedCall(CeedCalloc(n * n, &mat_G)); + CeedCall(CeedCalloc(n, &vec_D)); - if (!ceed->BasisCreateH1) { + // Compute B = G D G^T + memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0])); + CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n)); + + // Sort eigenvalues + for (CeedInt i = n - 1; i >= 0; i--) { + for (CeedInt j = 0; j < i; j++) { + if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) { + CeedScalar temp; + temp = vec_D[j]; + vec_D[j] = vec_D[j + 1]; + vec_D[j + 1] = temp; + for (CeedInt k = 0; k < n; k++) { + temp = mat_G[k * n + j]; + mat_G[k * n + j] = mat_G[k * n + j + 1]; + mat_G[k * n + j + 1] = temp; + } + } + } + } + + // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T + // = D^-1/2 G^T A G D^-1/2 + // -- D = D^-1/2 + for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]); + // -- G = G D^-1/2 + // -- C = D^-1/2 G^T + for (CeedInt i = 0; i < n; i++) { + for (CeedInt j = 0; j < n; j++) { + mat_G[i * n + j] *= vec_D[j]; + mat_C[j * n + i] = mat_G[i * n + j]; + } + } + // -- X = (D^-1/2 G^T) A + CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n)); + // -- C = (D^-1/2 G^T A) (G D^-1/2) + CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n)); + + // Compute Q^T C Q = lambda + CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n)); + + // Sort eigenvalues + for (CeedInt i = n - 1; i >= 0; i--) { + for (CeedInt j = 0; j < i; j++) { + if (fabs(lambda[j]) > fabs(lambda[j + 1])) { + CeedScalar temp; + temp = lambda[j]; + lambda[j] = lambda[j + 1]; + lambda[j + 1] = temp; + for (CeedInt k = 0; k < n; k++) { + temp = mat_C[k * n + j]; + mat_C[k * n + j] = mat_C[k * n + j + 1]; + mat_C[k * n + j + 1] = temp; + } + } + } + } + + // Set X = (G D^1/2)^-T Q + // = G D^-1/2 Q + CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n)); + + // Cleanup + CeedCall(CeedFree(&mat_C)); + CeedCall(CeedFree(&mat_G)); + CeedCall(CeedFree(&vec_D)); + return CEED_ERROR_SUCCESS; +} +CeedPragmaOptimizeOn; + +/// @} + +/// ---------------------------------------------------------------------------- +/// CeedBasis Public API +/// ---------------------------------------------------------------------------- +/// @addtogroup CeedBasisUser +/// @{ + +/** + @brief Create a tensor-product basis for H^1 discretizations + + @param[in] ceed Ceed object where the CeedBasis will be created + @param[in] dim Topological dimension + @param[in] num_comp Number of field components (1 for scalar fields) + @param[in] P_1d Number of nodes in one dimension + @param[in] Q_1d Number of quadrature points in one dimension + @param[in] interp_1d Row-major (Q_1d * P_1d) matrix expressing the values of nodal basis functions at quadrature points + @param[in] grad_1d Row-major (Q_1d * P_1d) matrix expressing derivatives of nodal basis functions at quadrature points + @param[in] q_ref_1d Array of length Q_1d holding the locations of quadrature points on the 1D reference element [-1, 1] + @param[in] q_weight_1d Array of length Q_1d holding the quadrature weights on the reference element + @param[out] basis Address of the variable where the newly created CeedBasis will be stored. + + @return An error code: 0 - success, otherwise - failure + + @ref User +**/ +int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, + const CeedScalar *grad_1d, const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis *basis) { + if (!ceed->BasisCreateTensorH1) { Ceed delegate; CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis")); if (!delegate) { // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1"); + return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateTensorH1"); // LCOV_EXCL_STOP } - CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis)); + CeedCall(CeedBasisCreateTensorH1(delegate, dim, num_comp, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis)); return CEED_ERROR_SUCCESS; } + if (dim < 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value"); + // LCOV_EXCL_STOP + } + if (num_comp < 1) { // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component"); // LCOV_EXCL_STOP } - if (num_nodes < 1) { + if (P_1d < 1) { // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node"); // LCOV_EXCL_STOP } - if (num_qpts < 1) { + if (Q_1d < 1) { // LCOV_EXCL_START return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point"); // LCOV_EXCL_STOP } - CeedCall(CeedCalloc(1, basis)); - - CeedCall(CeedBasisGetTopologyDimension(topo, &dim)); + CeedElemTopology topo = dim == 1 ? CEED_TOPOLOGY_LINE : dim == 2 ? CEED_TOPOLOGY_QUAD : CEED_TOPOLOGY_HEX; + CeedCall(CeedCalloc(1, basis)); (*basis)->ceed = ceed; CeedCall(CeedReference(ceed)); (*basis)->ref_count = 1; - (*basis)->tensor_basis = 0; + (*basis)->tensor_basis = 1; (*basis)->dim = dim; (*basis)->topo = topo; (*basis)->num_comp = num_comp; - (*basis)->P = P; - (*basis)->Q = Q; + (*basis)->P_1d = P_1d; + (*basis)->Q_1d = Q_1d; + (*basis)->P = CeedIntPow(P_1d, dim); + (*basis)->Q = CeedIntPow(Q_1d, dim); (*basis)->Q_comp = 1; (*basis)->basis_space = 1; // 1 for H^1 space - CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d)); - CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d)); - if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0])); - if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0])); - CeedCall(CeedCalloc(Q * P, &(*basis)->interp)); - CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad)); - if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0])); - if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0])); - CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis)); - return CEED_ERROR_SUCCESS; -} - -/** + CeedCall(CeedCalloc(Q_1d, &(*basis)->q_ref_1d)); + CeedCall(CeedCalloc(Q_1d, &(*basis)->q_weight_1d)); + if (q_ref_1d) memcpy((*basis)->q_ref_1d, q_ref_1d, Q_1d * sizeof(q_ref_1d[0])); + if (q_weight_1d) memcpy((*basis)->q_weight_1d, q_weight_1d, Q_1d * sizeof(q_weight_1d[0])); + CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->interp_1d)); + CeedCall(CeedCalloc(Q_1d * P_1d, &(*basis)->grad_1d)); + if (interp_1d) memcpy((*basis)->interp_1d, interp_1d, Q_1d * P_1d * sizeof(interp_1d[0])); + if (grad_1d) memcpy((*basis)->grad_1d, grad_1d, Q_1d * P_1d * sizeof(grad_1d[0])); + CeedCall(ceed->BasisCreateTensorH1(dim, P_1d, Q_1d, interp_1d, grad_1d, q_ref_1d, q_weight_1d, *basis)); + return CEED_ERROR_SUCCESS; +} + +/** + @brief Create a tensor-product Lagrange basis + + @param[in] ceed Ceed object where the CeedBasis will be created + @param[in] dim Topological dimension of element + @param[in] num_comp Number of field components (1 for scalar fields) + @param[in] P Number of Gauss-Lobatto nodes in one dimension. + The polynomial degree of the resulting Q_k element is k=P-1. + @param[in] Q Number of quadrature points in one dimension. + @param[in] quad_mode Distribution of the Q quadrature points (affects order of accuracy for the quadrature) + @param[out] basis Address of the variable where the newly created CeedBasis will be stored. + + @return An error code: 0 - success, otherwise - failure + + @ref User +**/ +int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis) { + // Allocate + int ierr = CEED_ERROR_SUCCESS, i, j, k; + CeedScalar c1, c2, c3, c4, dx, *nodes, *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d; + + if (dim < 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis dimension must be a positive value"); + // LCOV_EXCL_STOP + } + + if (num_comp < 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component"); + // LCOV_EXCL_STOP + } + + if (P < 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node"); + // LCOV_EXCL_STOP + } + + if (Q < 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point"); + // LCOV_EXCL_STOP + } + + // Get Nodes and Weights + CeedCall(CeedCalloc(P * Q, &interp_1d)); + CeedCall(CeedCalloc(P * Q, &grad_1d)); + CeedCall(CeedCalloc(P, &nodes)); + CeedCall(CeedCalloc(Q, &q_ref_1d)); + CeedCall(CeedCalloc(Q, &q_weight_1d)); + if (CeedLobattoQuadrature(P, nodes, NULL) != CEED_ERROR_SUCCESS) goto cleanup; + switch (quad_mode) { + case CEED_GAUSS: + ierr = CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d); + break; + case CEED_GAUSS_LOBATTO: + ierr = CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d); + break; + } + if (ierr != CEED_ERROR_SUCCESS) goto cleanup; + + // Build B, D matrix + // Fornberg, 1998 + for (i = 0; i < Q; i++) { + c1 = 1.0; + c3 = nodes[0] - q_ref_1d[i]; + interp_1d[i * P + 0] = 1.0; + for (j = 1; j < P; j++) { + c2 = 1.0; + c4 = c3; + c3 = nodes[j] - q_ref_1d[i]; + for (k = 0; k < j; k++) { + dx = nodes[j] - nodes[k]; + c2 *= dx; + if (k == j - 1) { + grad_1d[i * P + j] = c1 * (interp_1d[i * P + k] - c4 * grad_1d[i * P + k]) / c2; + interp_1d[i * P + j] = -c1 * c4 * interp_1d[i * P + k] / c2; + } + grad_1d[i * P + k] = (c3 * grad_1d[i * P + k] - interp_1d[i * P + k]) / dx; + interp_1d[i * P + k] = c3 * interp_1d[i * P + k] / dx; + } + c1 = c2; + } + } + // Pass to CeedBasisCreateTensorH1 + CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, basis)); +cleanup: + CeedCall(CeedFree(&interp_1d)); + CeedCall(CeedFree(&grad_1d)); + CeedCall(CeedFree(&nodes)); + CeedCall(CeedFree(&q_ref_1d)); + CeedCall(CeedFree(&q_weight_1d)); + return CEED_ERROR_SUCCESS; +} + +/** + @brief Create a non tensor-product basis for H^1 discretizations + + @param[in] ceed Ceed object where the CeedBasis will be created + @param[in] topo Topology of element, e.g. hypercube, simplex, ect + @param[in] num_comp Number of field components (1 for scalar fields) + @param[in] num_nodes Total number of nodes + @param[in] num_qpts Total number of quadrature points + @param[in] interp Row-major (num_qpts * num_nodes) matrix expressing the values of nodal basis functions at quadrature points + @param[in] grad Row-major (num_qpts * dim * num_nodes) matrix expressing derivatives of nodal basis functions at quadrature points + @param[in] q_ref Array of length num_qpts * dim holding the locations of quadrature points on the reference element + @param[in] q_weight Array of length num_qpts holding the quadrature weights on the reference element + @param[out] basis Address of the variable where the newly created CeedBasis will be stored. + + @return An error code: 0 - success, otherwise - failure + + @ref User +**/ +int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, + const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis *basis) { + CeedInt P = num_nodes, Q = num_qpts, dim = 0; + + if (!ceed->BasisCreateH1) { + Ceed delegate; + CeedCall(CeedGetObjectDelegate(ceed, &delegate, "Basis")); + + if (!delegate) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support BasisCreateH1"); + // LCOV_EXCL_STOP + } + + CeedCall(CeedBasisCreateH1(delegate, topo, num_comp, num_nodes, num_qpts, interp, grad, q_ref, q_weight, basis)); + return CEED_ERROR_SUCCESS; + } + + if (num_comp < 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 component"); + // LCOV_EXCL_STOP + } + + if (num_nodes < 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 node"); + // LCOV_EXCL_STOP + } + + if (num_qpts < 1) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_DIMENSION, "Basis must have at least 1 quadrature point"); + // LCOV_EXCL_STOP + } + + CeedCall(CeedCalloc(1, basis)); + + CeedCall(CeedBasisGetTopologyDimension(topo, &dim)); + + (*basis)->ceed = ceed; + CeedCall(CeedReference(ceed)); + (*basis)->ref_count = 1; + (*basis)->tensor_basis = 0; + (*basis)->dim = dim; + (*basis)->topo = topo; + (*basis)->num_comp = num_comp; + (*basis)->P = P; + (*basis)->Q = Q; + (*basis)->Q_comp = 1; + (*basis)->basis_space = 1; // 1 for H^1 space + CeedCall(CeedCalloc(Q * dim, &(*basis)->q_ref_1d)); + CeedCall(CeedCalloc(Q, &(*basis)->q_weight_1d)); + if (q_ref) memcpy((*basis)->q_ref_1d, q_ref, Q * dim * sizeof(q_ref[0])); + if (q_weight) memcpy((*basis)->q_weight_1d, q_weight, Q * sizeof(q_weight[0])); + CeedCall(CeedCalloc(Q * P, &(*basis)->interp)); + CeedCall(CeedCalloc(dim * Q * P, &(*basis)->grad)); + if (interp) memcpy((*basis)->interp, interp, Q * P * sizeof(interp[0])); + if (grad) memcpy((*basis)->grad, grad, dim * Q * P * sizeof(grad[0])); + CeedCall(ceed->BasisCreateH1(topo, dim, P, Q, interp, grad, q_ref, q_weight, *basis)); + return CEED_ERROR_SUCCESS; +} + +/** @brief Create a non tensor-product basis for \f$H(\mathrm{div})\f$ discretizations @param[in] ceed Ceed object where the CeedBasis will be created @@ -885,10 +1173,12 @@ int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, CeedInt num_comp, Ceed /** @brief Create a CeedBasis for projection from the nodes of `basis_from` to the nodes of `basis_to`. - Only `CEED_EVAL_INTERP` and `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`. - The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR + + Only `CEED_EVAL_INTERP` and `CEED_EVAL_GRAD` will be valid for the new basis, `basis_project`. + The interpolation is given by `interp_project = interp_to^+ * interp_from`, where the pesudoinverse `interp_to^+` is given by QR factorization. The gradient is given by `grad_project = interp_to^+ * grad_from`. Note: `basis_from` and `basis_to` must have compatible quadrature -spaces. Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has. If +spaces. + Note: `basis_project` will have the same number of components as `basis_from`, regardless of the number of components that `basis_to` has. If `basis_from` has 3 components and `basis_to` has 5 components, then `basis_project` will have 3 components. @param[in] basis_from CeedBasis to prolong from @@ -943,10 +1233,9 @@ int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasi /** @brief Copy the pointer to a CeedBasis. - Both pointers should be destroyed with `CeedBasisDestroy()`. - Note: If the value of `basis_copy` passed into this function is non-NULL, then it is assumed that `basis_copy` is a pointer to a CeedBasis. - This CeedBasis will be destroyed if `basis_copy` is the only reference to this CeedBasis. + Note: If the value of `basis_copy` passed into this function is non-NULL, then it is assumed that `basis_copy` is a pointer to a CeedBasis. + This CeedBasis will be destroyed if `basis_copy` is the only reference to this CeedBasis. @param[in] basis CeedBasis to copy reference to @param[in,out] basis_copy Variable to store copied reference @@ -1544,291 +1833,4 @@ int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, CeedScalar *q_weight_ return CEED_ERROR_SUCCESS; } -/** - @brief Return QR Factorization of a matrix - - @param[in] ceed Ceed context for error handling - @param[in,out] mat Row-major matrix to be factorized in place - @param[in,out] tau Vector of length m of scaling factors - @param[in] m Number of rows - @param[in] n Number of columns - - @return An error code: 0 - success, otherwise - failure - - @ref Utility -**/ -int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, CeedInt m, CeedInt n) { - CeedScalar v[m]; - - // Check matrix shape - if (n > m) { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute QR factorization with n > m"); - // LCOV_EXCL_STOP - } - - for (CeedInt i = 0; i < n; i++) { - if (i >= m - 1) { // last row of matrix, no reflection needed - tau[i] = 0.; - break; - } - // Calculate Householder vector, magnitude - CeedScalar sigma = 0.0; - v[i] = mat[i + n * i]; - for (CeedInt j = i + 1; j < m; j++) { - v[j] = mat[i + n * j]; - sigma += v[j] * v[j]; - } - CeedScalar norm = sqrt(v[i] * v[i] + sigma); // norm of v[i:m] - CeedScalar R_ii = -copysign(norm, v[i]); - v[i] -= R_ii; - // norm of v[i:m] after modification above and scaling below - // norm = sqrt(v[i]*v[i] + sigma) / v[i]; - // tau = 2 / (norm*norm) - tau[i] = 2 * v[i] * v[i] / (v[i] * v[i] + sigma); - for (CeedInt j = i + 1; j < m; j++) v[j] /= v[i]; - - // Apply Householder reflector to lower right panel - CeedHouseholderReflect(&mat[i * n + i + 1], &v[i], tau[i], m - i, n - i - 1, n, 1); - // Save v - mat[i + n * i] = R_ii; - for (CeedInt j = i + 1; j < m; j++) mat[i + n * j] = v[j]; - } - return CEED_ERROR_SUCCESS; -} - -/** - @brief Return symmetric Schur decomposition of the symmetric matrix mat via symmetric QR factorization - - @param[in] ceed Ceed context for error handling - @param[in,out] mat Row-major matrix to be factorized in place - @param[out] lambda Vector of length n of eigenvalues - @param[in] n Number of rows/columns - - @return An error code: 0 - success, otherwise - failure - - @ref Utility -**/ -CeedPragmaOptimizeOff int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, CeedScalar *lambda, CeedInt n) { - // Check bounds for clang-tidy - if (n < 2) { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot compute symmetric Schur decomposition of scalars"); - // LCOV_EXCL_STOP - } - - CeedScalar v[n - 1], tau[n - 1], mat_T[n * n]; - - // Copy mat to mat_T and set mat to I - memcpy(mat_T, mat, n * n * sizeof(mat[0])); - for (CeedInt i = 0; i < n; i++) { - for (CeedInt j = 0; j < n; j++) mat[j + n * i] = (i == j) ? 1 : 0; - } - - // Reduce to tridiagonal - for (CeedInt i = 0; i < n - 1; i++) { - // Calculate Householder vector, magnitude - CeedScalar sigma = 0.0; - v[i] = mat_T[i + n * (i + 1)]; - for (CeedInt j = i + 1; j < n - 1; j++) { - v[j] = mat_T[i + n * (j + 1)]; - sigma += v[j] * v[j]; - } - CeedScalar norm = sqrt(v[i] * v[i] + sigma); // norm of v[i:n-1] - CeedScalar R_ii = -copysign(norm, v[i]); - v[i] -= R_ii; - // norm of v[i:m] after modification above and scaling below - // norm = sqrt(v[i]*v[i] + sigma) / v[i]; - // tau = 2 / (norm*norm) - tau[i] = i == n - 2 ? 2 : 2 * v[i] * v[i] / (v[i] * v[i] + sigma); - for (CeedInt j = i + 1; j < n - 1; j++) v[j] /= v[i]; - - // Update sub and super diagonal - for (CeedInt j = i + 2; j < n; j++) { - mat_T[i + n * j] = 0; - mat_T[j + n * i] = 0; - } - // Apply symmetric Householder reflector to lower right panel - CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1); - CeedHouseholderReflect(&mat_T[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), 1, n); - - // Save v - mat_T[i + n * (i + 1)] = R_ii; - mat_T[(i + 1) + n * i] = R_ii; - for (CeedInt j = i + 1; j < n - 1; j++) { - mat_T[i + n * (j + 1)] = v[j]; - } - } - // Backwards accumulation of Q - for (CeedInt i = n - 2; i >= 0; i--) { - if (tau[i] > 0.0) { - v[i] = 1; - for (CeedInt j = i + 1; j < n - 1; j++) { - v[j] = mat_T[i + n * (j + 1)]; - mat_T[i + n * (j + 1)] = 0; - } - CeedHouseholderReflect(&mat[(i + 1) + n * (i + 1)], &v[i], tau[i], n - (i + 1), n - (i + 1), n, 1); - } - } - - // Reduce sub and super diagonal - CeedInt p = 0, q = 0, itr = 0, max_itr = n * n * n * n; - CeedScalar tol = CEED_EPSILON; - - while (itr < max_itr) { - // Update p, q, size of reduced portions of diagonal - p = 0; - q = 0; - for (CeedInt i = n - 2; i >= 0; i--) { - if (fabs(mat_T[i + n * (i + 1)]) < tol) q += 1; - else break; - } - for (CeedInt i = 0; i < n - q - 1; i++) { - if (fabs(mat_T[i + n * (i + 1)]) < tol) p += 1; - else break; - } - if (q == n - 1) break; // Finished reducing - - // Reduce tridiagonal portion - CeedScalar t_nn = mat_T[(n - 1 - q) + n * (n - 1 - q)], t_nnm1 = mat_T[(n - 2 - q) + n * (n - 1 - q)]; - CeedScalar d = (mat_T[(n - 2 - q) + n * (n - 2 - q)] - t_nn) / 2; - CeedScalar mu = t_nn - t_nnm1 * t_nnm1 / (d + copysign(sqrt(d * d + t_nnm1 * t_nnm1), d)); - CeedScalar x = mat_T[p + n * p] - mu; - CeedScalar z = mat_T[p + n * (p + 1)]; - for (CeedInt k = p; k < n - q - 1; k++) { - // Compute Givens rotation - CeedScalar c = 1, s = 0; - if (fabs(z) > tol) { - if (fabs(z) > fabs(x)) { - CeedScalar tau = -x / z; - s = 1 / sqrt(1 + tau * tau), c = s * tau; - } else { - CeedScalar tau = -z / x; - c = 1 / sqrt(1 + tau * tau), s = c * tau; - } - } - - // Apply Givens rotation to T - CeedGivensRotation(mat_T, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n); - CeedGivensRotation(mat_T, c, s, CEED_TRANSPOSE, k, k + 1, n, n); - - // Apply Givens rotation to Q - CeedGivensRotation(mat, c, s, CEED_NOTRANSPOSE, k, k + 1, n, n); - - // Update x, z - if (k < n - q - 2) { - x = mat_T[k + n * (k + 1)]; - z = mat_T[k + n * (k + 2)]; - } - } - itr++; - } - - // Save eigenvalues - for (CeedInt i = 0; i < n; i++) lambda[i] = mat_T[i + n * i]; - - // Check convergence - if (itr == max_itr && q < n - 1) { - // LCOV_EXCL_START - return CeedError(ceed, CEED_ERROR_MINOR, "Symmetric QR failed to converge"); - // LCOV_EXCL_STOP - } - return CEED_ERROR_SUCCESS; -} -CeedPragmaOptimizeOn - - /** - @brief Return Simultaneous Diagonalization of two matrices. - This solves the generalized eigenvalue problem \f$A x = \lambda B x\f$, where \f$A\f$ and \f$B\f$ are symmetric and \f$B\f$ is positive - definite. We generate the matrix \f$X\f$ and diagonal \f$\Lambda\f$ such that \f$X^T A X = \Lambda\f$ and \f$X^T B X = I\f$. This is equivalent to - the LAPACK routine 'sygv' with `TYPE = 1`. - - @param[in] ceed Ceed context for error handling - @param[in] mat_A Row-major matrix to be factorized with eigenvalues - @param[in] mat_B Row-major matrix to be factorized to identity - @param[out] mat_X Row-major orthogonal matrix - @param[out] lambda Vector of length n of generalized eigenvalues - @param[in] n Number of rows/columns - - @return An error code: 0 - success, otherwise - failure - - @ref Utility - **/ - CeedPragmaOptimizeOff int - CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, CeedScalar *mat_B, CeedScalar *mat_X, CeedScalar *lambda, CeedInt n) { - CeedScalar *mat_C, *mat_G, *vec_D; - CeedCall(CeedCalloc(n * n, &mat_C)); - CeedCall(CeedCalloc(n * n, &mat_G)); - CeedCall(CeedCalloc(n, &vec_D)); - - // Compute B = G D G^T - memcpy(mat_G, mat_B, n * n * sizeof(mat_B[0])); - CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_G, vec_D, n)); - - // Sort eigenvalues - for (CeedInt i = n - 1; i >= 0; i--) { - for (CeedInt j = 0; j < i; j++) { - if (fabs(vec_D[j]) > fabs(vec_D[j + 1])) { - CeedScalar temp; - temp = vec_D[j]; - vec_D[j] = vec_D[j + 1]; - vec_D[j + 1] = temp; - for (CeedInt k = 0; k < n; k++) { - temp = mat_G[k * n + j]; - mat_G[k * n + j] = mat_G[k * n + j + 1]; - mat_G[k * n + j + 1] = temp; - } - } - } - } - - // Compute C = (G D^1/2)^-1 A (G D^1/2)^-T - // = D^-1/2 G^T A G D^-1/2 - // -- D = D^-1/2 - for (CeedInt i = 0; i < n; i++) vec_D[i] = 1. / sqrt(vec_D[i]); - // -- G = G D^-1/2 - // -- C = D^-1/2 G^T - for (CeedInt i = 0; i < n; i++) { - for (CeedInt j = 0; j < n; j++) { - mat_G[i * n + j] *= vec_D[j]; - mat_C[j * n + i] = mat_G[i * n + j]; - } - } - // -- X = (D^-1/2 G^T) A - CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_C, (const CeedScalar *)mat_A, mat_X, n, n, n)); - // -- C = (D^-1/2 G^T A) (G D^-1/2) - CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_X, (const CeedScalar *)mat_G, mat_C, n, n, n)); - - // Compute Q^T C Q = lambda - CeedCall(CeedSymmetricSchurDecomposition(ceed, mat_C, lambda, n)); - - // Sort eigenvalues - for (CeedInt i = n - 1; i >= 0; i--) { - for (CeedInt j = 0; j < i; j++) { - if (fabs(lambda[j]) > fabs(lambda[j + 1])) { - CeedScalar temp; - temp = lambda[j]; - lambda[j] = lambda[j + 1]; - lambda[j + 1] = temp; - for (CeedInt k = 0; k < n; k++) { - temp = mat_C[k * n + j]; - mat_C[k * n + j] = mat_C[k * n + j + 1]; - mat_C[k * n + j + 1] = temp; - } - } - } - } - - // Set X = (G D^1/2)^-T Q - // = G D^-1/2 Q - CeedCall(CeedMatrixMatrixMultiply(ceed, (const CeedScalar *)mat_G, (const CeedScalar *)mat_C, mat_X, n, n, n)); - - // Cleanup - CeedCall(CeedFree(&mat_C)); - CeedCall(CeedFree(&mat_G)); - CeedCall(CeedFree(&vec_D)); - return CEED_ERROR_SUCCESS; -} -CeedPragmaOptimizeOn - - /// @} +/// @} diff --git a/interface/ceed-fortran.c b/interface/ceed-fortran.c index 3ef5acff35..017931560d 100644 --- a/interface/ceed-fortran.c +++ b/interface/ceed-fortran.c @@ -452,28 +452,6 @@ CEED_EXTERN void fCeedBasisCreateH1(int *ceed, int *topo, int *num_comp, int *nn #define fCeedBasisView FORTRAN_NAME(ceedbasisview, CEEDBASISVIEW) CEED_EXTERN void fCeedBasisView(int *basis, int *err) { *err = CeedBasisView(CeedBasis_dict[*basis], stdout); } -#define fCeedQRFactorization FORTRAN_NAME(ceedqrfactorization, CEEDQRFACTORIZATION) -CEED_EXTERN void fCeedQRFactorization(int *ceed, CeedScalar *mat, CeedScalar *tau, int *m, int *n, int *err) { - *err = CeedQRFactorization(Ceed_dict[*ceed], mat, tau, *m, *n); -} - -#define fCeedHouseholderApplyQ FORTRAN_NAME(ceedhouseholderapplyq, CEEDHOUSEHOLDERAPPLYQ) -CEED_EXTERN void fCeedHouseholderApplyQ(CeedScalar *A, CeedScalar *Q, CeedScalar *tau, int *t_mode, int *m, int *n, int *k, int *row, int *col, - int *err) { - *err = CeedHouseholderApplyQ(A, Q, tau, (CeedTransposeMode)*t_mode, *m, *n, *k, *row, *col); -} - -#define fCeedSymmetricSchurDecomposition FORTRAN_NAME(ceedsymmetricschurdecomposition, CEEDSYMMETRICSCHURDECOMPOSITION) -CEED_EXTERN void fCeedSymmetricSchurDecomposition(int *ceed, CeedScalar *mat, CeedScalar *lambda, int *n, int *err) { - *err = CeedSymmetricSchurDecomposition(Ceed_dict[*ceed], mat, lambda, *n); -} - -#define fCeedSimultaneousDiagonalization FORTRAN_NAME(ceedsimultaneousdiagonalization, CEEDSIMULTANEOUSDIAGONALIZATION) -CEED_EXTERN void fCeedSimultaneousDiagonalization(int *ceed, CeedScalar *matA, CeedScalar *matB, CeedScalar *x, CeedScalar *lambda, int *n, - int *err) { - *err = CeedSimultaneousDiagonalization(Ceed_dict[*ceed], matA, matB, x, lambda, *n); -} - #define fCeedBasisGetCollocatedGrad FORTRAN_NAME(ceedbasisgetcollocatedgrad, CEEDBASISGETCOLLOCATEDGRAD) CEED_EXTERN void fCeedBasisGetCollocatedGrad(int *basis, CeedScalar *colo_grad_1d, int *err) { *err = CeedBasisGetCollocatedGrad(CeedBasis_dict[*basis], colo_grad_1d); diff --git a/julia/LibCEED.jl/src/generated/libceed_bindings.jl b/julia/LibCEED.jl/src/generated/libceed_bindings.jl index c69153f481..7e954c2d40 100644 --- a/julia/LibCEED.jl/src/generated/libceed_bindings.jl +++ b/julia/LibCEED.jl/src/generated/libceed_bindings.jl @@ -445,18 +445,6 @@ function CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d) ccall((:CeedLobattoQuadrature, libceed), Cint, (CeedInt, Ptr{CeedScalar}, Ptr{CeedScalar}), Q, q_ref_1d, q_weight_1d) end -function CeedQRFactorization(ceed, mat, tau, m, n) - ccall((:CeedQRFactorization, libceed), Cint, (Ceed, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedInt, CeedInt), ceed, mat, tau, m, n) -end - -function CeedSymmetricSchurDecomposition(ceed, mat, lambda, n) - ccall((:CeedSymmetricSchurDecomposition, libceed), Cint, (Ceed, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedInt), ceed, mat, lambda, n) -end - -function CeedSimultaneousDiagonalization(ceed, mat_A, mat_B, x, lambda, n) - ccall((:CeedSimultaneousDiagonalization, libceed), Cint, (Ceed, Ptr{CeedScalar}, Ptr{CeedScalar}, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedInt), ceed, mat_A, mat_B, x, lambda, n) -end - # typedef int ( * CeedQFunctionUser ) ( void * ctx , const CeedInt Q , const CeedScalar * const * in , CeedScalar * const * out ) const CeedQFunctionUser = Ptr{Cvoid} @@ -736,10 +724,26 @@ function CeedOperatorContextSetDouble(op, field_label, values) ccall((:CeedOperatorContextSetDouble, libceed), Cint, (CeedOperator, CeedContextFieldLabel, Ptr{Cdouble}), op, field_label, values) end +function CeedOperatorContextGetDoubleRead(op, field_label, num_values, values) + ccall((:CeedOperatorContextGetDoubleRead, libceed), Cint, (CeedOperator, CeedContextFieldLabel, Ptr{Csize_t}, Ptr{Ptr{Cdouble}}), op, field_label, num_values, values) +end + +function CeedOperatorContextRestoreDoubleRead(op, field_label, values) + ccall((:CeedOperatorContextRestoreDoubleRead, libceed), Cint, (CeedOperator, CeedContextFieldLabel, Ptr{Ptr{Cdouble}}), op, field_label, values) +end + function CeedOperatorContextSetInt32(op, field_label, values) ccall((:CeedOperatorContextSetInt32, libceed), Cint, (CeedOperator, CeedContextFieldLabel, Ptr{Cint}), op, field_label, values) end +function CeedOperatorContextGetInt32Read(op, field_label, num_values, values) + ccall((:CeedOperatorContextGetInt32Read, libceed), Cint, (CeedOperator, CeedContextFieldLabel, Ptr{Csize_t}, Ptr{Ptr{Cint}}), op, field_label, num_values, values) +end + +function CeedOperatorContextRestoreInt32Read(op, field_label, values) + ccall((:CeedOperatorContextRestoreInt32Read, libceed), Cint, (CeedOperator, CeedContextFieldLabel, Ptr{Ptr{Cint}}), op, field_label, values) +end + function CeedOperatorApply(op, in, out, request) ccall((:CeedOperatorApply, libceed), Cint, (CeedOperator, CeedVector, CeedVector, Ptr{CeedRequest}), op, in, out, request) end @@ -752,6 +756,10 @@ function CeedOperatorDestroy(op) ccall((:CeedOperatorDestroy, libceed), Cint, (Ptr{CeedOperator},), op) end +function CeedOperatorGetFieldByName(op, field_name, op_field) + ccall((:CeedOperatorGetFieldByName, libceed), Cint, (CeedOperator, Ptr{Cchar}, Ptr{CeedOperatorField}), op, field_name, op_field) +end + function CeedOperatorFieldGetName(op_field, field_name) ccall((:CeedOperatorFieldGetName, libceed), Cint, (CeedOperatorField, Ptr{Ptr{Cchar}}), op_field, field_name) end @@ -985,10 +993,6 @@ function CeedBasisGetCollocatedGrad(basis, colo_grad_1d) ccall((:CeedBasisGetCollocatedGrad, libceed), Cint, (CeedBasis, Ptr{CeedScalar}), basis, colo_grad_1d) end -function CeedHouseholderApplyQ(A, Q, tau, t_mode, m, n, k, row, col) - ccall((:CeedHouseholderApplyQ, libceed), Cint, (Ptr{CeedScalar}, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedTransposeMode, CeedInt, CeedInt, CeedInt, CeedInt, CeedInt), A, Q, tau, t_mode, m, n, k, row, col) -end - function CeedBasisIsTensor(basis, is_tensor) ccall((:CeedBasisIsTensor, libceed), Cint, (CeedBasis, Ptr{Bool}), basis, is_tensor) end @@ -1161,14 +1165,38 @@ function CeedQFunctionContextSetGeneric(ctx, field_label, field_type, value) ccall((:CeedQFunctionContextSetGeneric, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, CeedContextFieldType, Ptr{Cvoid}), ctx, field_label, field_type, value) end +function CeedQFunctionContextGetGenericRead(ctx, field_label, field_type, num_values, value) + ccall((:CeedQFunctionContextGetGenericRead, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, CeedContextFieldType, Ptr{Csize_t}, Ptr{Cvoid}), ctx, field_label, field_type, num_values, value) +end + +function CeedQFunctionContextRestoreGenericRead(ctx, field_label, field_type, value) + ccall((:CeedQFunctionContextRestoreGenericRead, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, CeedContextFieldType, Ptr{Cvoid}), ctx, field_label, field_type, value) +end + function CeedQFunctionContextSetDouble(ctx, field_label, values) ccall((:CeedQFunctionContextSetDouble, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, Ptr{Cdouble}), ctx, field_label, values) end +function CeedQFunctionContextGetDoubleRead(ctx, field_label, num_values, values) + ccall((:CeedQFunctionContextGetDoubleRead, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, Ptr{Csize_t}, Ptr{Ptr{Cdouble}}), ctx, field_label, num_values, values) +end + +function CeedQFunctionContextRestoreDoubleRead(ctx, field_label, values) + ccall((:CeedQFunctionContextRestoreDoubleRead, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, Ptr{Ptr{Cdouble}}), ctx, field_label, values) +end + function CeedQFunctionContextSetInt32(ctx, field_label, values) ccall((:CeedQFunctionContextSetInt32, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, Ptr{Cint}), ctx, field_label, values) end +function CeedQFunctionContextGetInt32Read(ctx, field_label, num_values, values) + ccall((:CeedQFunctionContextGetInt32Read, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, Ptr{Csize_t}, Ptr{Ptr{Cint}}), ctx, field_label, num_values, values) +end + +function CeedQFunctionContextRestoreInt32Read(ctx, field_label, values) + ccall((:CeedQFunctionContextRestoreInt32Read, libceed), Cint, (CeedQFunctionContext, CeedContextFieldLabel, Ptr{Ptr{Cint}}), ctx, field_label, values) +end + function CeedQFunctionContextGetDataDestroy(ctx, f_mem_type, f) ccall((:CeedQFunctionContextGetDataDestroy, libceed), Cint, (CeedQFunctionContext, Ptr{CeedMemType}, Ptr{CeedQFunctionContextDataDestroyUser}), ctx, f_mem_type, f) end @@ -1221,12 +1249,16 @@ function CeedOperatorAssemblyDataCreate(ceed, op, data) ccall((:CeedOperatorAssemblyDataCreate, libceed), Cint, (Ceed, CeedOperator, Ptr{CeedOperatorAssemblyData}), ceed, op, data) end -function CeedOperatorAssemblyDataGetEvalModes(data, num_eval_mode_in, eval_mode_in, num_eval_mode_out, eval_mode_out) - ccall((:CeedOperatorAssemblyDataGetEvalModes, libceed), Cint, (CeedOperatorAssemblyData, Ptr{CeedInt}, Ptr{Ptr{CeedEvalMode}}, Ptr{CeedInt}, Ptr{Ptr{CeedEvalMode}}), data, num_eval_mode_in, eval_mode_in, num_eval_mode_out, eval_mode_out) +function CeedOperatorAssemblyDataGetEvalModes(data, num_active_bases, num_eval_modes_in, eval_modes_in, eval_mode_offsets_in, num_eval_modes_out, eval_modes_out, eval_mode_offsets_out, num_output_components) + ccall((:CeedOperatorAssemblyDataGetEvalModes, libceed), Cint, (CeedOperatorAssemblyData, Ptr{CeedInt}, Ptr{Ptr{CeedInt}}, Ptr{Ptr{Ptr{CeedEvalMode}}}, Ptr{Ptr{Ptr{CeedSize}}}, Ptr{Ptr{CeedInt}}, Ptr{Ptr{Ptr{CeedEvalMode}}}, Ptr{Ptr{Ptr{CeedSize}}}, Ptr{CeedSize}), data, num_active_bases, num_eval_modes_in, eval_modes_in, eval_mode_offsets_in, num_eval_modes_out, eval_modes_out, eval_mode_offsets_out, num_output_components) end -function CeedOperatorAssemblyDataGetBases(data, basis_in, B_in, basis_out, B_out) - ccall((:CeedOperatorAssemblyDataGetBases, libceed), Cint, (CeedOperatorAssemblyData, Ptr{CeedBasis}, Ptr{Ptr{CeedScalar}}, Ptr{CeedBasis}, Ptr{Ptr{CeedScalar}}), data, basis_in, B_in, basis_out, B_out) +function CeedOperatorAssemblyDataGetBases(data, num_active_bases, active_bases, assembled_bases_in, assembled_bases_out) + ccall((:CeedOperatorAssemblyDataGetBases, libceed), Cint, (CeedOperatorAssemblyData, Ptr{CeedInt}, Ptr{Ptr{CeedBasis}}, Ptr{Ptr{Ptr{CeedScalar}}}, Ptr{Ptr{Ptr{CeedScalar}}}), data, num_active_bases, active_bases, assembled_bases_in, assembled_bases_out) +end + +function CeedOperatorAssemblyDataGetElemRestrictions(data, num_active_elem_rstrs, active_elem_rstrs) + ccall((:CeedOperatorAssemblyDataGetElemRestrictions, libceed), Cint, (CeedOperatorAssemblyData, Ptr{CeedInt}, Ptr{Ptr{CeedElemRestriction}}), data, num_active_elem_rstrs, active_elem_rstrs) end function CeedOperatorAssemblyDataDestroy(data) @@ -1281,6 +1313,22 @@ function CeedMatrixMatrixMultiply(ceed, mat_A, mat_B, mat_C, m, n, kk) ccall((:CeedMatrixMatrixMultiply, libceed), Cint, (Ceed, Ptr{CeedScalar}, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedInt, CeedInt, CeedInt), ceed, mat_A, mat_B, mat_C, m, n, kk) end +function CeedQRFactorization(ceed, mat, tau, m, n) + ccall((:CeedQRFactorization, libceed), Cint, (Ceed, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedInt, CeedInt), ceed, mat, tau, m, n) +end + +function CeedHouseholderApplyQ(mat_A, mat_Q, tau, t_mode, m, n, k, row, col) + ccall((:CeedHouseholderApplyQ, libceed), Cint, (Ptr{CeedScalar}, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedTransposeMode, CeedInt, CeedInt, CeedInt, CeedInt, CeedInt), mat_A, mat_Q, tau, t_mode, m, n, k, row, col) +end + +function CeedSymmetricSchurDecomposition(ceed, mat, lambda, n) + ccall((:CeedSymmetricSchurDecomposition, libceed), Cint, (Ceed, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedInt), ceed, mat, lambda, n) +end + +function CeedSimultaneousDiagonalization(ceed, mat_A, mat_B, x, lambda, n) + ccall((:CeedSimultaneousDiagonalization, libceed), Cint, (Ceed, Ptr{CeedScalar}, Ptr{CeedScalar}, Ptr{CeedScalar}, Ptr{CeedScalar}, CeedInt), ceed, mat_A, mat_B, x, lambda, n) +end + # Skipping MacroDefinition: CEED_EXTERN extern CEED_VISIBILITY ( default ) # Skipping MacroDefinition: CEED_QFUNCTION_HELPER CEED_QFUNCTION_ATTR static inline diff --git a/python/ceed.py b/python/ceed.py index 030a033cbf..6274c66f8a 100644 --- a/python/ceed.py +++ b/python/ceed.py @@ -177,111 +177,6 @@ def lobatto_quadrature(self, q): return qref1d, qweight1d - # QR factorization - def qr_factorization(self, mat, tau, m, n): - """Return QR Factorization of a matrix. - - Args: - ceed: Ceed context currently in use - *mat: Numpy array holding the row-major matrix to be factorized in place - *tau: Numpy array to hold the vector of lengt m of scaling factors - m: number of rows - n: numbef of columns""" - - # Setup arguments - mat_pointer = ffi.new("CeedScalar *") - mat_pointer = ffi.cast( - "CeedScalar *", - mat.__array_interface__['data'][0]) - - tau_pointer = ffi.new("CeedScalar *") - tau_pointer = ffi.cast( - "CeedScalar *", - tau.__array_interface__['data'][0]) - - # libCEED call - err_code = lib.CeedQRFactorization( - self._pointer[0], mat_pointer, tau_pointer, m, n) - self._check_error(err_code) - - return mat, tau - - # Symmetric Schur decomposition - def symmetric_schur_decomposition(self, mat, n): - """Return symmetric Schur decomposition of a symmetric matrix - via symmetric QR factorization. - - Args: - ceed: Ceed context currently in use - *mat: Numpy array holding the row-major matrix to be factorized in place - n: number of rows/columns - - Returns: - lbda: Numpy array of length n holding eigenvalues""" - - # Setup arguments - mat_pointer = ffi.new("CeedScalar *") - mat_pointer = ffi.cast( - "CeedScalar *", - mat.__array_interface__['data'][0]) - - lbda = np.empty(n, dtype=scalar_types[lib.CEED_SCALAR_TYPE]) - l_pointer = ffi.new("CeedScalar *") - l_pointer = ffi.cast( - "CeedScalar *", - lbda.__array_interface__['data'][0]) - - # libCEED call - err_code = lib.CeedSymmetricSchurDecomposition( - self._pointer[0], mat_pointer, l_pointer, n) - self._check_error(err_code) - - return lbda - - # Simultaneous Diagonalization - def simultaneous_diagonalization(self, matA, matB, n): - """Return Simultaneous Diagonalization of two matrices. - - Args: - ceed: Ceed context currently in use - *matA: Numpy array holding the row-major matrix to be factorized with - eigenvalues - *matB: Numpy array holding the row-major matrix to be factorized to identity - n: number of rows/columns - - Returns: - (x, lbda): Numpy array holding the row-major orthogonal matrix and - Numpy array holding the vector of length n of generalized - eigenvalues""" - - # Setup arguments - matA_pointer = ffi.new("CeedScalar *") - matA_pointer = ffi.cast( - "CeedScalar *", - matA.__array_interface__['data'][0]) - - matB_pointer = ffi.new("CeedScalar *") - matB_pointer = ffi.cast( - "CeedScalar *", - matB.__array_interface__['data'][0]) - - lbda = np.empty(n, dtype=scalar_types[lib.CEED_SCALAR_TYPE]) - l_pointer = ffi.new("CeedScalar *") - l_pointer = ffi.cast( - "CeedScalar *", - lbda.__array_interface__['data'][0]) - - x = np.empty(n * n, dtype=scalar_types[lib.CEED_SCALAR_TYPE]) - x_pointer = ffi.new("CeedScalar *") - x_pointer = ffi.cast("CeedScalar *", x.__array_interface__['data'][0]) - - # libCEED call - err_code = lib.CeedSimultaneousDiagonalization(self._pointer[0], matA_pointer, matB_pointer, - x_pointer, l_pointer, n) - self._check_error(err_code) - - return x, lbda - # --- libCEED objects --- # CeedVector diff --git a/python/tests/test-3-basis.py b/python/tests/test-3-basis.py index 76b97afcf8..2a08c23ed3 100644 --- a/python/tests/test-3-basis.py +++ b/python/tests/test-3-basis.py @@ -59,88 +59,6 @@ def test_300(ceed_resource, capsys): assert not stderr assert stdout == ref_stdout -# ------------------------------------------------------------------------------- -# Test QR factorization -# ------------------------------------------------------------------------------- - - -def test_301(ceed_resource): - ceed = libceed.Ceed(ceed_resource) - - m = 4 - n = 3 - a = np.array([1, -1, 4, 1, 4, -2, 1, 4, 2, 1, -1, 0], - dtype=ceed.scalar_type()) - qr = np.array([1, -1, 4, 1, 4, -2, 1, 4, 2, 1, -1, 0], - dtype=ceed.scalar_type()) - tau = np.empty(3, dtype=ceed.scalar_type()) - - qr, tau = ceed.qr_factorization(qr, tau, m, n) - np_qr, np_tau = np.linalg.qr(a.reshape(m, n), mode="raw") - - for i in range(n): - assert abs(tau[i] - np_tau[i]) < TOL - - qr = qr.reshape(m, n) - for i in range(m): - for j in range(n): - assert abs(qr[i, j] - np_qr[j, i]) < TOL - -# ------------------------------------------------------------------------------- -# Test Symmetric Schur Decomposition -# ------------------------------------------------------------------------------- - - -def test_304(ceed_resource): - ceed = libceed.Ceed(ceed_resource) - - A = np.array([0.2, 0.0745355993, -0.0745355993, 0.0333333333, - 0.0745355993, 1., 0.1666666667, -0.0745355993, - -0.0745355993, 0.1666666667, 1., 0.0745355993, - 0.0333333333, -0.0745355993, 0.0745355993, 0.2], - dtype=ceed.scalar_type()) - - Q = A.copy() - - lam = ceed.symmetric_schur_decomposition(Q, 4) - Q = Q.reshape(4, 4) - lam = np.diag(lam) - - Q_lambda_Qt = np.matmul(np.matmul(Q, lam), Q.T) - assert np.max(Q_lambda_Qt - A.reshape(4, 4)) < TOL - -# ------------------------------------------------------------------------------- -# Test Simultaneous Diagonalization -# ------------------------------------------------------------------------------- - - -def test_305(ceed_resource): - ceed = libceed.Ceed(ceed_resource) - - M = np.array([0.2, 0.0745355993, -0.0745355993, 0.0333333333, - 0.0745355993, 1., 0.1666666667, -0.0745355993, - -0.0745355993, 0.1666666667, 1., 0.0745355993, - 0.0333333333, -0.0745355993, 0.0745355993, 0.2], - dtype=ceed.scalar_type()) - K = np.array([3.0333333333, -3.4148928136, 0.4982261470, -0.1166666667, - -3.4148928136, 5.8333333333, -2.9166666667, 0.4982261470, - 0.4982261470, -2.9166666667, 5.8333333333, -3.4148928136, - -0.1166666667, 0.4982261470, -3.4148928136, 3.0333333333], - dtype=ceed.scalar_type()) - - X, lam = ceed.simultaneous_diagonalization(K, M, 4) - X = X.reshape(4, 4) - np.diag(lam) - - M = M.reshape(4, 4) - K = K.reshape(4, 4) - - Xt_M_X = np.matmul(X.T, np.matmul(M, X)) - assert np.max(Xt_M_X - np.identity(4)) < TOL - - Xt_K_X = np.matmul(X.T, np.matmul(K, X)) - assert np.max(Xt_K_X - lam) < TOL - # ------------------------------------------------------------------------------- # Test GetNumNodes and GetNumQuadraturePoints for basis # ------------------------------------------------------------------------------- diff --git a/tests/t301-basis-f.f90 b/tests/t301-basis-f.f90 deleted file mode 100644 index 3496fbfa20..0000000000 --- a/tests/t301-basis-f.f90 +++ /dev/null @@ -1,38 +0,0 @@ -!----------------------------------------------------------------------- - program test - implicit none - include 'ceed/fortran.h' - - integer ceed,err,i,j - real*8 a(12),qr(12),a_qr(12),tau(3) - - character arg*32 - - A = (/ 1, -1, 4, 1, 4, -2, 1, 4, 2, 1, -1, 0 /) - qr = (/ 1, -1, 4, 1, 4, -2, 1, 4, 2, 1, -1, 0 /) - a_qr = (/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /) - - call getarg(1,arg) - - call ceedinit(trim(arg)//char(0),ceed,err) - call ceedqrfactorization(ceed,qr,tau,4,3,err) - do i=1,3 - do j=i,3 - a_qr((i-1)*3+j)=qr((i-1)*3+j) - enddo - enddo - call ceedhouseholderapplyq(a_qr,qr,tau,ceed_notranspose,4,3,3,3,1,err) - - do i=1,12 - if (abs(a(i)-a_qr(i))>1.0D-14) then -! LCOV_EXCL_START - write(*,*) 'Error in QR factorization a_qr(',i,') = ',a_qr(i),& - & ' != a(',i,') = ',a(i) -! LCOV_EXCL_STOP - endif - enddo - - call ceeddestroy(ceed,err) - - end -!----------------------------------------------------------------------- diff --git a/tests/t304-basis-f.f90 b/tests/t304-basis-f.f90 deleted file mode 100644 index 76dab4fc58..0000000000 --- a/tests/t304-basis-f.f90 +++ /dev/null @@ -1,52 +0,0 @@ -!----------------------------------------------------------------------- - program test - implicit none - include 'ceed/fortran.h' - - integer ceed,err,i,j,kk - real*8 a(16), q(16), qlambdaqt(16), lambda(4), sum - - character arg*32 - - a = (/ 0.2, 0.0745355993, -0.0745355993, 0.0333333333,& - & 0.0745355993, 1., 0.1666666667, -0.0745355993,& - & -0.0745355993, 0.1666666667, 1., 0.0745355993,& - & 0.0333333333, -0.0745355993, 0.0745355993, 0.2 /) - - call getarg(1,arg) - - call ceedinit(trim(arg)//char(0),ceed,err) - - do i=0,3 - do j=1,4 - q(4*i+j)=a(4*i+j) - enddo - enddo - - call ceedsymmetricschurdecomposition(ceed,q,lambda,4,err) - -! Check A = Q lambda Q^T - do i=0,3 - do j=0,3 - sum = 0 - do kk=0,3 - sum=sum+q(kk+i*4+1)*lambda(kk+1)*q(kk+j*4+1) - enddo - qlambdaqt(j+i*4+1)=sum - enddo - enddo - do i=0,3 - do j=1,4 - if (abs(a(i*4+j) - qlambdaqt(i*4+j))>1.0D-14) then -! LCOV_EXCL_START - write(*,'(A,I1,A,I1,A,F12.8,A,F12.8)') 'Error: [', & - & i,',',j-1,'] ',a(i*4+j),'!=',qlambdaqt(i*4+j) -! LCOV_EXCL_STOP - endif - enddo - enddo - - call ceeddestroy(ceed,err) - - end -!----------------------------------------------------------------------- diff --git a/tests/t305-basis-f.f90 b/tests/t305-basis-f.f90 deleted file mode 100644 index ab242b5ef7..0000000000 --- a/tests/t305-basis-f.f90 +++ /dev/null @@ -1,98 +0,0 @@ -!----------------------------------------------------------------------- - program test - implicit none - include 'ceed/fortran.h' - - integer ceed,err,i,j,kk - real*8 m(16), k(16), x(16), lambda(4), sum, work(16), val - - character arg*32 - - m = (/ 0.2, 0.0745355993, -0.0745355993, 0.0333333333,& - & 0.0745355993, 1., 0.1666666667, -0.0745355993,& - & -0.0745355993, 0.1666666667, 1., 0.0745355993,& - & 0.0333333333, -0.0745355993, 0.0745355993, 0.2 /) - k = (/ 3.0333333333, -3.4148928136, 0.4982261470, -0.1166666667,& - & -3.4148928136, 5.8333333333, -2.9166666667, 0.4982261470,& - & 0.4982261470, -2.9166666667, 5.8333333333, -3.4148928136,& - & -0.1166666667, 0.4982261470, -3.4148928136, 3.0333333333 /) - - call getarg(1,arg) - - call ceedinit(trim(arg)//char(0),ceed,err) - call ceedsimultaneousdiagonalization(ceed,k,m,x,lambda,4,err) - -! Check X^T M X = I - do i=0,3 - do j=0,3 - sum = 0 - do kk=0,3 - sum=sum+m(kk+i*4+1)*x(j+kk*4+1) - enddo - work(j+i*4+1)=sum - enddo - enddo - do i=0,3 - do j=0,3 - sum = 0 - do kk=0,3 - sum=sum+x(i+kk*4+1)*work(j+kk*4+1) - enddo - m(j+i*4+1)=sum - enddo - enddo - do i=0,3 - do j=1,4 - if (i+1 == j) then - val=1.0 - else - val=0.0 - endif - if (abs(m(i*4+j) - val)>1.0D-13) then -! LCOV_EXCL_START - write(*,'(A,I1,A,I1,A,F12.8,A,F12.8)') 'Error: [', & - & i,',',j,'] ',m(i*4+j),'!=',val -! LCOV_EXCL_STOP - endif - enddo - enddo - -! Check X^T K X = Lambda - do i=0,3 - do j=0,3 - sum = 0 - do kk=0,3 - sum=sum+k(kk+i*4+1)*x(j+kk*4+1) - enddo - work(j+i*4+1)=sum - enddo - enddo - do i=0,3 - do j=0,3 - sum = 0 - do kk=0,3 - sum=sum+x(i+kk*4+1)*work(j+kk*4+1) - enddo - k(j+i*4+1)=sum - enddo - enddo - do i=0,3 - do j=1,4 - if (i+1 == j) then - val=lambda(j) - else - val=0.0 - endif - if (abs(k(i*4+j) - val)>1.0D-13) then -! LCOV_EXCL_START - write(*,'(A,I1,A,I1,A,F12.8,A,F12.8)') 'Error: [', & - & i,',',j,'] ',k(i*4+j),'!=',val -! LCOV_EXCL_STOP - endif - enddo - enddo - - call ceeddestroy(ceed,err) - - end -!----------------------------------------------------------------------- diff --git a/tests/t305-basis.c b/tests/t305-basis.c index 0b79acdf6c..716f5d6479 100644 --- a/tests/t305-basis.c +++ b/tests/t305-basis.c @@ -2,6 +2,7 @@ /// Test Simultaneous Diagonalization /// \test Simultaneous Diagonalization #include +#include #include int main(int argc, char **argv) {