diff --git a/backends/blocked/ceed-blocked-operator.c b/backends/blocked/ceed-blocked-operator.c index 640599e512..dd80e644d9 100644 --- a/backends/blocked/ceed-blocked-operator.c +++ b/backends/blocked/ceed-blocked-operator.c @@ -97,7 +97,7 @@ static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, CeedOperator op, bo case CEED_RESTRICTION_STRIDED: { CeedInt strides[3]; - CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); CeedCallBackend(CeedElemRestrictionCreateBlockedStrided(ceed_rstr, num_elem, elem_size, block_size, num_comp, l_size, strides, &block_rstr[i + start_e])); } break; diff --git a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp index 42e2289416..7ffc7bbbcb 100644 --- a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp +++ b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp @@ -387,7 +387,7 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op) { CeedInt strides[3] = {1, elem_size * num_elem, elem_size}; if (!has_backend_strides) { - CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, strides)); } code << " // Strides: {" << strides[0] << ", " << strides[1] << ", " << strides[2] << "}\n"; code << " readDofsStrided" << dim << "dnum_nodes = size; @@ -514,10 +514,22 @@ int CeedElemRestrictionCreate_Cuda(CeedMemType mem_type, CeedCopyMode copy_mode, impl->d_curl_orients = NULL; impl->d_curl_orients_allocated = NULL; CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); - CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); + + // Set layouts + { + bool has_backend_strides; + CeedInt layout[3] = {1, size, elem_size}; + + CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); + if (rstr_type == CEED_RESTRICTION_STRIDED) { + CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); + if (has_backend_strides) { + CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); + } + } + } // Set up device offset/orientation arrays - CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); if (rstr_type != CEED_RESTRICTION_STRIDED) { switch (mem_type) { case CEED_MEM_HOST: { diff --git a/backends/hip-gen/ceed-hip-gen-operator-build.cpp b/backends/hip-gen/ceed-hip-gen-operator-build.cpp index 235badfc64..0f72174a60 100644 --- a/backends/hip-gen/ceed-hip-gen-operator-build.cpp +++ b/backends/hip-gen/ceed-hip-gen-operator-build.cpp @@ -397,7 +397,7 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op) { CeedInt strides[3] = {1, elem_size * num_elem, elem_size}; if (!has_backend_strides) { - CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, strides)); } code << " // Strides: {" << strides[0] << ", " << strides[1] << ", " << strides[2] << "}\n"; code << " readDofsStrided" << dim << "dnum_nodes = size; @@ -513,10 +513,22 @@ int CeedElemRestrictionCreate_Hip(CeedMemType mem_type, CeedCopyMode copy_mode, impl->d_curl_orients = NULL; impl->d_curl_orients_allocated = NULL; CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); - CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); + + // Set layouts + { + bool has_backend_strides; + CeedInt layout[3] = {1, size, elem_size}; + + CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); + if (rstr_type == CEED_RESTRICTION_STRIDED) { + CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); + if (has_backend_strides) { + CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); + } + } + } // Set up device offset/orientation arrays - CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); if (rstr_type != CEED_RESTRICTION_STRIDED) { switch (mem_type) { case CEED_MEM_HOST: { diff --git a/backends/memcheck/ceed-memcheck-blocked.c b/backends/memcheck/ceed-memcheck-blocked.c index 866f7133ef..e525b77387 100644 --- a/backends/memcheck/ceed-memcheck-blocked.c +++ b/backends/memcheck/ceed-memcheck-blocked.c @@ -24,6 +24,9 @@ static int CeedInit_Memcheck(const char *resource, Ceed ceed) { CeedCallBackend(CeedSetDelegate(ceed, ceed_ref)); CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "VectorCreate", CeedVectorCreate_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate", CeedElemRestrictionCreate_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreateBlocked", CeedElemRestrictionCreate_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreateAtPoints", CeedElemRestrictionCreate_Memcheck)); CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionCreate", CeedQFunctionCreate_Memcheck)); CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionContextCreate", CeedQFunctionContextCreate_Memcheck)); return CEED_ERROR_SUCCESS; diff --git a/backends/memcheck/ceed-memcheck-restriction.c b/backends/memcheck/ceed-memcheck-restriction.c new file mode 100644 index 0000000000..e79d7be819 --- /dev/null +++ b/backends/memcheck/ceed-memcheck-restriction.c @@ -0,0 +1,776 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#include +#include +#include +#include +#include + +#include "ceed-memcheck.h" + +//------------------------------------------------------------------------------ +// Set backend strides +//------------------------------------------------------------------------------ +static inline int CeedElemRestrictionGetBackendStrides_Memcheck(CeedElemRestriction rstr, CeedInt strides[3]) { + CeedInt elem_size, num_comp, num_elem; + + CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); + // Memcheck default, contiguous by component, then node + strides[0] = num_comp; + strides[1] = 1; + strides[2] = num_comp * elem_size; + /** + // CPU default, contiguous by node, then component + strides[0] = 1; + strides[1] = elem_size; + strides[2] = elem_size * num_comp; + + // GPU default, contiguous by node, then element + strides[0] = 1; + strides[1] = num_elem * elem_size; + strides[2] = elem_size; + **/ + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Core ElemRestriction Apply Code +//------------------------------------------------------------------------------ +static inline int CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, + CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, + CeedInt v_offset, const CeedScalar *__restrict__ uu, + CeedScalar *__restrict__ vv) { + // Get strides + bool has_backend_strides; + CeedInt strides[3] = {0}; + + CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); + if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides)); + else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); + + // Apply restriction + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { + CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { + CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { + vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = + uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; + } + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, + const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, + CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, + CeedScalar *__restrict__ vv) { + // Default restriction with offsets + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { + CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { + vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, + const CeedInt block_size, const CeedInt comp_stride, CeedInt start, + CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, + const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { + // Restriction with orientations + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { + CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { + vv[elem_size * (k * block_size + e * num_comp) + i - v_offset] = + uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, + const CeedInt block_size, const CeedInt comp_stride, CeedInt start, + CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, + const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { + // Restriction with tridiagonal transformation + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { + CeedInt n = 0; + + CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { + vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = + uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * + impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + + uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * + impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; + } + CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { + CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { + vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = + uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * + impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + + uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * + impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + + uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * + impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; + } + } + CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { + vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = + uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * + impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + + uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * + impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core( + CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop, + CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { + // Restriction with (unsigned) tridiagonal transformation + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { + CeedInt n = 0; + + CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { + vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = + uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * + abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + + uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * + abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); + } + CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { + CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { + vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = + uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * + abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + + uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * + abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + + uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * + abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); + } + } + CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { + vv[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] = + uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * + abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + + uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * + abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, + CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, + CeedInt v_offset, const CeedScalar *__restrict__ uu, + CeedScalar *__restrict__ vv) { + // Get strides + bool has_backend_strides; + CeedInt strides[3] = {0}; + + CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); + if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides)); + else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); + + // Apply restriction + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { + CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { + CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { + vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += + uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; + } + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, + const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, + CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, + CeedScalar *__restrict__ vv) { + // Default restriction with offsets + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + for (CeedInt k = 0; k < num_comp; k++) { + for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { + // Iteration bound set to discard padding elements + for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { + CeedScalar vv_loc; + + vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; + CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; + } + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, + const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, + CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, + CeedScalar *__restrict__ vv) { + // Restriction with orientations + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + for (CeedInt k = 0; k < num_comp; k++) { + for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { + // Iteration bound set to discard padding elements + for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { + CeedScalar vv_loc; + + vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); + CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; + } + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, + const CeedInt block_size, const CeedInt comp_stride, CeedInt start, + CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, + const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { + // Restriction with tridiagonal transformation + CeedElemRestriction_Memcheck *impl; + CeedScalar vv_loc[block_size]; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + for (CeedInt k = 0; k < num_comp; k++) { + // Iteration bound set to discard padding elements + const CeedInt block_end = CeedIntMin(block_size, num_elem - e); + CeedInt n = 0; + + CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { + vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * + impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + + uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * + impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; + } + for (CeedInt j = 0; j < block_end; j++) { + CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; + } + for (n = 1; n < elem_size - 1; n++) { + CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { + vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * + impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + + uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * + impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + + uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * + impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; + } + for (CeedInt j = 0; j < block_end; j++) { + CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; + } + } + CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { + vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * + impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + + uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * + impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; + } + for (CeedInt j = 0; j < block_end; j++) { + CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core( + CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop, + CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { + // Restriction with (unsigned) tridiagonal transformation + CeedElemRestriction_Memcheck *impl; + CeedScalar vv_loc[block_size]; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { + for (CeedInt k = 0; k < num_comp; k++) { + // Iteration bound set to discard padding elements + const CeedInt block_end = CeedIntMin(block_size, num_elem - e); + CeedInt n = 0; + + CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { + vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * + abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + + uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * + abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); + } + for (CeedInt j = 0; j < block_end; j++) { + CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; + } + for (n = 1; n < elem_size - 1; n++) { + CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { + vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * + abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + + uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * + abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + + uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * + abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); + } + for (CeedInt j = 0; j < block_end; j++) { + CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; + } + } + CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { + vv_loc[j] = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * + abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + + uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * + abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); + } + for (CeedInt j = 0; j < block_end; j++) { + CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; + } + } + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, + CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, + CeedScalar *__restrict__ vv) { + CeedInt num_points, l_vec_offset, e_vec_offset = 0; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + for (CeedInt e = start; e < stop; e++) { + l_vec_offset = impl->offsets[e]; + CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); + if (t_mode == CEED_NOTRANSPOSE) { + for (CeedInt i = 0; i < num_points; i++) { + for (CeedInt j = 0; j < num_comp; j++) vv[j * num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; + } + } else { + for (CeedInt i = 0; i < num_points; i++) { + for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * num_points + i + e_vec_offset]; + } + } + e_vec_offset += num_points * num_comp; + } + return CEED_ERROR_SUCCESS; +} + +static inline int CeedElemRestrictionApply_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, + const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, + bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { + CeedInt num_elem, elem_size, v_offset; + CeedRestrictionType rstr_type; + const CeedScalar *uu; + CeedScalar *vv; + + CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); + CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); + v_offset = start * block_size * elem_size * num_comp; + CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); + CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); + + if (t_mode == CEED_TRANSPOSE) { + // Sum into for transpose mode, E-vector to L-vector + CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); + } else { + // Overwrite for notranspose mode, L-vector to E-vector + CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); + } + + if (t_mode == CEED_TRANSPOSE) { + // Restriction from E-vector to L-vector + // Performing v += r^T * u + // uu has shape [elem_size, num_comp, num_elem], row-major + // vv has shape [nnodes, num_comp] + // Sum into for transpose mode + switch (rstr_type) { + case CEED_RESTRICTION_STRIDED: + CeedCallBackend( + CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); + break; + case CEED_RESTRICTION_STANDARD: + CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + break; + case CEED_RESTRICTION_ORIENTED: + if (use_signs) { + CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + } else { + CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + } + break; + case CEED_RESTRICTION_CURL_ORIENTED: + if (use_signs && use_orients) { + CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + } else if (use_orients) { + CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, + num_elem, elem_size, v_offset, uu, vv)); + } else { + CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + } + break; + case CEED_RESTRICTION_POINTS: + CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); + break; + } + } else { + // Restriction from L-vector to E-vector + // Perform: v = r * u + // vv has shape [elem_size, num_comp, num_elem], row-major + // uu has shape [nnodes, num_comp] + // Overwrite for notranspose mode + switch (rstr_type) { + case CEED_RESTRICTION_STRIDED: + CeedCallBackend( + CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); + break; + case CEED_RESTRICTION_STANDARD: + CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + break; + case CEED_RESTRICTION_ORIENTED: + if (use_signs) { + CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + } else { + CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + } + break; + case CEED_RESTRICTION_CURL_ORIENTED: + if (use_signs && use_orients) { + CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, + num_elem, elem_size, v_offset, uu, vv)); + } else if (use_orients) { + CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, + num_elem, elem_size, v_offset, uu, vv)); + } else { + CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, + elem_size, v_offset, uu, vv)); + } + break; + case CEED_RESTRICTION_POINTS: + CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); + break; + } + } + CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); + CeedCallBackend(CeedVectorRestoreArray(v, &vv)); + if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Apply +//------------------------------------------------------------------------------ +static int CeedElemRestrictionApply_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { + CeedInt num_block, block_size, num_comp, comp_stride; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); + CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Apply Unsigned +//------------------------------------------------------------------------------ +static int CeedElemRestrictionApplyUnsigned_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, + CeedRequest *request) { + CeedInt num_block, block_size, num_comp, comp_stride; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); + CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Apply Unoriented +//------------------------------------------------------------------------------ +static int CeedElemRestrictionApplyUnoriented_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, + CeedRequest *request) { + CeedInt num_block, block_size, num_comp, comp_stride; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); + CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Apply Points +//------------------------------------------------------------------------------ +static int CeedElemRestrictionApplyAtPointsInElement_Memcheck(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, + CeedVector v, CeedRequest *request) { + CeedInt num_comp; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); +} + +//------------------------------------------------------------------------------ +// ElemRestriction Apply Block +//------------------------------------------------------------------------------ +static int CeedElemRestrictionApplyBlock_Memcheck(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, + CeedRequest *request) { + CeedInt block_size, num_comp, comp_stride; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Get Offsets +//------------------------------------------------------------------------------ +static int CeedElemRestrictionGetOffsets_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { + Ceed ceed; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); + + CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); + + *offsets = impl->offsets; + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Get Orientations +//------------------------------------------------------------------------------ +static int CeedElemRestrictionGetOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { + Ceed ceed; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); + + CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); + + *orients = impl->orients; + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Get Curl-Conforming Orientations +//------------------------------------------------------------------------------ +static int CeedElemRestrictionGetCurlOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { + Ceed ceed; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); + + CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); + + *curl_orients = impl->curl_orients; + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Destroy +//------------------------------------------------------------------------------ +static int CeedElemRestrictionDestroy_Memcheck(CeedElemRestriction rstr) { + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); + CeedCallBackend(CeedFree(&impl->offsets_allocated)); + CeedCallBackend(CeedFree(&impl->orients_allocated)); + CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); + CeedCallBackend(CeedFree(&impl)); + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// ElemRestriction Create +//------------------------------------------------------------------------------ +int CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, + const CeedInt8 *curl_orients, CeedElemRestriction rstr) { + Ceed ceed; + CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; + CeedRestrictionType rstr_type; + CeedElemRestriction_Memcheck *impl; + + CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); + CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); + CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); + CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); + CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); + CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); + CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); + CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); + + CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); + + CeedCallBackend(CeedCalloc(1, &impl)); + CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); + + // Set layouts + { + bool has_backend_strides; + CeedInt e_layout[3] = {1, elem_size, elem_size * num_comp}, l_layout[3] = {0}; + + CeedCallBackend(CeedElemRestrictionSetELayout(rstr, e_layout)); + if (rstr_type == CEED_RESTRICTION_STRIDED) { + CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); + if (has_backend_strides) { + CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, l_layout)); + CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, l_layout)); + } + } + } + + // Offsets data + if (rstr_type != CEED_RESTRICTION_STRIDED) { + const char *resource; + + // Check indices for ref or memcheck backends + { + Ceed current = ceed, parent = NULL; + + CeedCallBackend(CeedGetParent(current, &parent)); + while (current != parent) { + current = parent; + CeedCallBackend(CeedGetParent(current, &parent)); + } + CeedCallBackend(CeedGetResource(parent, &resource)); + } + if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || + !strcmp(resource, "/cpu/self/memcheck/blocked")) { + CeedSize l_size; + + CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); + for (CeedInt i = 0; i < num_elem * elem_size; i++) { + CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, + "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); + } + } + + // Copy data + if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); + num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); + switch (copy_mode) { + case CEED_COPY_VALUES: + CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); + memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); + impl->offsets = impl->offsets_allocated; + break; + case CEED_OWN_POINTER: + impl->offsets_allocated = (CeedInt *)offsets; + impl->offsets = impl->offsets_allocated; + break; + case CEED_USE_POINTER: + impl->offsets = offsets; + } + + // Orientation data + if (rstr_type == CEED_RESTRICTION_ORIENTED) { + CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); + switch (copy_mode) { + case CEED_COPY_VALUES: + CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); + memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); + impl->orients = impl->orients_allocated; + break; + case CEED_OWN_POINTER: + impl->orients_allocated = (bool *)orients; + impl->orients = impl->orients_allocated; + break; + case CEED_USE_POINTER: + impl->orients = orients; + } + } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { + CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); + switch (copy_mode) { + case CEED_COPY_VALUES: + CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); + memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); + impl->curl_orients = impl->curl_orients_allocated; + break; + case CEED_OWN_POINTER: + impl->curl_orients_allocated = (CeedInt8 *)curl_orients; + impl->curl_orients = impl->curl_orients_allocated; + break; + case CEED_USE_POINTER: + impl->curl_orients = curl_orients; + } + } + } + + // Set apply function + impl->Apply = CeedElemRestrictionApply_Memcheck_Core; + + // Register backend functions + CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Memcheck)); + if (rstr_type == CEED_RESTRICTION_POINTS) { + CeedCallBackend( + CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Memcheck)); + } + CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Memcheck)); + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ diff --git a/backends/memcheck/ceed-memcheck-serial.c b/backends/memcheck/ceed-memcheck-serial.c index 94233fa9cf..7dde1f0544 100644 --- a/backends/memcheck/ceed-memcheck-serial.c +++ b/backends/memcheck/ceed-memcheck-serial.c @@ -25,6 +25,9 @@ static int CeedInit_Memcheck(const char *resource, Ceed ceed) { CeedCallBackend(CeedSetDelegate(ceed, ceed_ref)); CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "VectorCreate", CeedVectorCreate_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate", CeedElemRestrictionCreate_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreateBlocked", CeedElemRestrictionCreate_Memcheck)); + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreateAtPoints", CeedElemRestrictionCreate_Memcheck)); CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionCreate", CeedQFunctionCreate_Memcheck)); CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionContextCreate", CeedQFunctionContextCreate_Memcheck)); return CEED_ERROR_SUCCESS; diff --git a/backends/memcheck/ceed-memcheck-vector.c b/backends/memcheck/ceed-memcheck-vector.c index f38b1ff6fb..7305be2640 100644 --- a/backends/memcheck/ceed-memcheck-vector.c +++ b/backends/memcheck/ceed-memcheck-vector.c @@ -229,6 +229,9 @@ int CeedVectorCreate_Memcheck(CeedSize n, CeedVector vec) { Ceed ceed; CeedVector_Memcheck *impl; + CeedCallBackend(CeedCalloc(1, &impl)); + CeedCallBackend(CeedVectorSetData(vec, impl)); + CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Memcheck)); CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Memcheck)); @@ -240,8 +243,6 @@ int CeedVectorCreate_Memcheck(CeedSize n, CeedVector vec) { CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "RestoreArray", CeedVectorRestoreArray_Memcheck)); CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "RestoreArrayRead", CeedVectorRestoreArrayRead_Memcheck)); CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Memcheck)); - CeedCallBackend(CeedCalloc(1, &impl)); - CeedCallBackend(CeedVectorSetData(vec, impl)); return CEED_ERROR_SUCCESS; } diff --git a/backends/memcheck/ceed-memcheck.h b/backends/memcheck/ceed-memcheck.h index ca451e6044..97198be5cd 100644 --- a/backends/memcheck/ceed-memcheck.h +++ b/backends/memcheck/ceed-memcheck.h @@ -21,6 +21,17 @@ typedef struct { CeedScalar *array_read_only_copy; } CeedVector_Memcheck; +typedef struct { + const CeedInt *offsets; + CeedInt *offsets_allocated; + const bool *orients; /* Orientation, if it exists, is true when the dof must be flipped */ + bool *orients_allocated; + const CeedInt8 *curl_orients; /* Tridiagonal matrix (row-major) for a general transformation during restriction */ + CeedInt8 *curl_orients_allocated; + int (*Apply)(CeedElemRestriction, CeedInt, CeedInt, CeedInt, CeedInt, CeedInt, CeedTransposeMode, bool, bool, CeedVector, CeedVector, + CeedRequest *); +} CeedElemRestriction_Memcheck; + typedef struct { const CeedScalar **inputs; CeedScalar **outputs; @@ -38,6 +49,9 @@ typedef struct { CEED_INTERN int CeedVectorCreate_Memcheck(CeedSize n, CeedVector vec); +CEED_INTERN int CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, + const CeedInt8 *curl_orients, CeedElemRestriction r); + CEED_INTERN int CeedQFunctionCreate_Memcheck(CeedQFunction qf); CEED_INTERN int CeedQFunctionContextCreate_Memcheck(CeedQFunctionContext ctx); diff --git a/backends/occa/ceed-occa-elem-restriction.cpp b/backends/occa/ceed-occa-elem-restriction.cpp index 76ad609c24..ea5b9b23f2 100644 --- a/backends/occa/ceed-occa-elem-restriction.cpp +++ b/backends/occa/ceed-occa-elem-restriction.cpp @@ -244,7 +244,7 @@ ElemRestriction *ElemRestriction::setupFrom(CeedElemRestriction r) { if (ceedStrideType == USER_STRIDES) { CeedInt strides[3]; - CeedCallOcca(CeedElemRestrictionGetStrides(r, &strides)); + CeedCallOcca(CeedElemRestrictionGetStrides(r, strides)); ceedNodeStride = strides[0]; ceedComponentStride = strides[1]; diff --git a/backends/opt/ceed-opt-operator.c b/backends/opt/ceed-opt-operator.c index 2bfca152f1..0369954a47 100644 --- a/backends/opt/ceed-opt-operator.c +++ b/backends/opt/ceed-opt-operator.c @@ -97,7 +97,7 @@ static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op, bool i case CEED_RESTRICTION_STRIDED: { CeedInt strides[3]; - CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); CeedCallBackend(CeedElemRestrictionCreateBlockedStrided(ceed_rstr, num_elem, elem_size, block_size, num_comp, l_size, strides, &block_rstr[i + start_e])); } break; diff --git a/backends/ref/ceed-ref-restriction.c b/backends/ref/ceed-ref-restriction.c index 44e9081eff..dbf290929c 100644 --- a/backends/ref/ceed-ref-restriction.c +++ b/backends/ref/ceed-ref-restriction.c @@ -41,7 +41,7 @@ static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRe // User provided strides CeedInt strides[3]; - CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { @@ -202,7 +202,7 @@ static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRest // User provided strides CeedInt strides[3]; - CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { @@ -736,16 +736,28 @@ int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); - CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; + CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); CeedCallBackend(CeedCalloc(1, &impl)); CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); - CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); + + // Set layouts + { + bool has_backend_strides; + CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; + + CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); + if (rstr_type == CEED_RESTRICTION_STRIDED) { + CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); + if (has_backend_strides) { + CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); + } + } + } // Offsets data - CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); if (rstr_type != CEED_RESTRICTION_STRIDED) { const char *resource; diff --git a/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp b/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp index a3f9c86d1c..efacc0bc2d 100644 --- a/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp +++ b/backends/sycl-gen/ceed-sycl-gen-operator-build.sycl.cpp @@ -426,7 +426,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { CeedInt strides[3] = {1, elem_size * num_elem, elem_size}; if (!has_backend_strides) { - CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, strides)); } code << " // Strides: {" << strides[0] << ", " << strides[1] << ", " << strides[2] << "}\n"; code << " readDofsStrided" << dim << "d(num_comp_in_" << i << ",P_in_" << i << "," << strides[0] << "," << strides[1] << "," << strides[2] @@ -539,7 +539,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { CeedInt strides[3] = {1, elem_size * num_elem, elem_size}; if (!has_backend_strides) { - CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, strides)); } code << " // Strides: {" << strides[0] << ", " << strides[1] << ", " << strides[2] << "}\n"; code << " readSliceQuadsStrided" @@ -731,7 +731,7 @@ extern "C" int CeedOperatorBuildKernel_Sycl_gen(CeedOperator op) { CeedInt strides[3] = {1, elem_size * num_elem, elem_size}; if (!has_backend_strides) { - CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(elem_rstr, strides)); } code << " // Strides: {" << strides[0] << ", " << strides[1] << ", " << strides[2] << "}\n"; code << " writeDofsStrided" << dim << "d(num_comp_out_" << i << ",P_out_" << i << "," << strides[0] << "," << strides[1] << "," << strides[2] diff --git a/backends/sycl-ref/ceed-sycl-restriction.sycl.cpp b/backends/sycl-ref/ceed-sycl-restriction.sycl.cpp index 8aac06786f..0fa5170d11 100644 --- a/backends/sycl-ref/ceed-sycl-restriction.sycl.cpp +++ b/backends/sycl-ref/ceed-sycl-restriction.sycl.cpp @@ -345,7 +345,6 @@ int CeedElemRestrictionCreate_Sycl(CeedMemType mem_type, CeedCopyMode copy_mode, CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); const CeedInt size = num_elem * elem_size; CeedInt strides[3] = {1, size, elem_size}; - CeedInt layout[3] = {1, elem_size * num_elem, elem_size}; CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); CeedCheck(rstr_type != CEED_RESTRICTION_ORIENTED && rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_BACKEND, @@ -358,7 +357,7 @@ int CeedElemRestrictionCreate_Sycl(CeedMemType mem_type, CeedCopyMode copy_mode, CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); if (!has_backend_strides) { - CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); + CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); } } else { CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); @@ -380,7 +379,20 @@ int CeedElemRestrictionCreate_Sycl(CeedMemType mem_type, CeedCopyMode copy_mode, impl->strides[1] = strides[1]; impl->strides[2] = strides[2]; CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); - CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); + + // Set layouts + { + bool has_backend_strides; + CeedInt layout[3] = {1, size, elem_size}; + + CeedCallBackend(CeedElemRestrictionSetELayout(rstr, layout)); + if (rstr_type == CEED_RESTRICTION_STRIDED) { + CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); + if (has_backend_strides) { + CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, layout)); + } + } + } // Set up device indices/offset arrays if (mem_type == CEED_MEM_HOST) { diff --git a/doc/sphinx/source/releasenotes.md b/doc/sphinx/source/releasenotes.md index 9815589b94..00e12980e0 100644 --- a/doc/sphinx/source/releasenotes.md +++ b/doc/sphinx/source/releasenotes.md @@ -10,10 +10,12 @@ On this page we provide a summary of the main API changes, new features and exam - Add `bool` field type for `CeedQFunctionContext` and related interfaces to use `bool` fields. - `CEED_BASIS_COLLOCATED` removed; users should only use `CEED_BASIS_NONE`. +- Remove unneeded pointer for `CeedElemRestrictionGetELayout`. ### New features - Add `CeedOperatorCreateAtPoints` which evaluates the `CeedQFunction` at arbitrary locations in each element, for use in Particle in Cell, Material Point Method, and similar methods. +- Add `CeedElemRestrictionGetLLayout` to provide L-vector layout for strided `CeedElemRestriction` created with `CEED_BACKEND_STRIDES`. ### Examples diff --git a/include/ceed-impl.h b/include/ceed-impl.h index f0ee6bf633..f244b6c8db 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -173,7 +173,8 @@ struct CeedElemRestriction_private { CeedInt block_size; /* number of elements in a batch */ CeedInt num_block; /* number of blocks of elements */ CeedInt *strides; /* strides between [nodes, components, elements] */ - CeedInt layout[3]; /* E-vector layout [nodes, components, elements] */ + CeedInt l_layout[3]; /* L-vector layout [nodes, components, elements] */ + CeedInt e_layout[3]; /* E-vector layout [nodes, components, elements] */ CeedRestrictionType rstr_type; /* initialized in element restriction constructor for default, oriented, curl-oriented, or strided element restriction */ uint64_t num_readers; /* number of instances of offset read only access */ diff --git a/include/ceed/backend.h b/include/ceed/backend.h index c0a0d70a0f..39660cd93e 100644 --- a/include/ceed/backend.h +++ b/include/ceed/backend.h @@ -271,7 +271,7 @@ CEED_EXTERN int CeedElemRestrictionGetType(CeedElemRestriction rstr, CeedRestric CEED_EXTERN int CeedElemRestrictionIsStrided(CeedElemRestriction rstr, bool *is_strided); CEED_EXTERN int CeedElemRestrictionIsPoints(CeedElemRestriction rstr, bool *is_points); CEED_EXTERN int CeedElemRestrictionAtPointsAreCompatible(CeedElemRestriction rstr_a, CeedElemRestriction rstr_b, bool *are_compatible); -CEED_EXTERN int CeedElemRestrictionGetStrides(CeedElemRestriction rstr, CeedInt (*strides)[3]); +CEED_EXTERN int CeedElemRestrictionGetStrides(CeedElemRestriction rstr, CeedInt strides[3]); CEED_EXTERN int CeedElemRestrictionHasBackendStrides(CeedElemRestriction rstr, bool *has_backend_strides); CEED_EXTERN int CeedElemRestrictionGetOffsets(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets); CEED_EXTERN int CeedElemRestrictionRestoreOffsets(CeedElemRestriction rstr, const CeedInt **offsets); @@ -279,7 +279,9 @@ CEED_EXTERN int CeedElemRestrictionGetOrientations(CeedElemRestriction rstr, Cee CEED_EXTERN int CeedElemRestrictionRestoreOrientations(CeedElemRestriction rstr, const bool **orients); CEED_EXTERN int CeedElemRestrictionGetCurlOrientations(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients); CEED_EXTERN int CeedElemRestrictionRestoreCurlOrientations(CeedElemRestriction rstr, const CeedInt8 **curl_orients); -CEED_EXTERN int CeedElemRestrictionGetELayout(CeedElemRestriction rstr, CeedInt (*layout)[3]); +CEED_EXTERN int CeedElemRestrictionGetLLayout(CeedElemRestriction rstr, CeedInt layout[3]); +CEED_EXTERN int CeedElemRestrictionSetLLayout(CeedElemRestriction rstr, CeedInt layout[3]); +CEED_EXTERN int CeedElemRestrictionGetELayout(CeedElemRestriction rstr, CeedInt layout[3]); CEED_EXTERN int CeedElemRestrictionSetELayout(CeedElemRestriction rstr, CeedInt layout[3]); CEED_EXTERN int CeedElemRestrictionGetData(CeedElemRestriction rstr, void *data); CEED_EXTERN int CeedElemRestrictionSetData(CeedElemRestriction rstr, void *data); diff --git a/interface/ceed-elemrestriction.c b/interface/ceed-elemrestriction.c index aec1016476..9752377b6f 100644 --- a/interface/ceed-elemrestriction.c +++ b/interface/ceed-elemrestriction.c @@ -200,9 +200,9 @@ int CeedElemRestrictionAtPointsAreCompatible(CeedElemRestriction rstr_a, CeedEle @ref Backend **/ -int CeedElemRestrictionGetStrides(CeedElemRestriction rstr, CeedInt (*strides)[3]) { +int CeedElemRestrictionGetStrides(CeedElemRestriction rstr, CeedInt strides[3]) { CeedCheck(rstr->strides, rstr->ceed, CEED_ERROR_MINOR, "CeedElemRestriction has no stride data"); - for (CeedInt i = 0; i < 3; i++) (*strides)[i] = rstr->strides[i]; + for (CeedInt i = 0; i < 3; i++) strides[i] = rstr->strides[i]; return CEED_ERROR_SUCCESS; } @@ -336,6 +336,55 @@ int CeedElemRestrictionRestoreCurlOrientations(CeedElemRestriction rstr, const C return CEED_ERROR_SUCCESS; } +/** + + @brief Get the L-vector layout of a strided `CeedElemRestriction` + + @param[in] rstr `CeedElemRestriction` + @param[out] layout Variable to store layout array, stored as `[nodes, components, elements]`. + The data for node `i`, component `j`, element `k` in the E-vector is given by `i*layout[0] + j*layout[1] + k*layout[2]`. + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedElemRestrictionGetLLayout(CeedElemRestriction rstr, CeedInt layout[3]) { + bool has_backend_strides; + CeedRestrictionType rstr_type; + + CeedCall(CeedElemRestrictionGetType(rstr, &rstr_type)); + CeedCheck(rstr_type == CEED_RESTRICTION_STRIDED, rstr->ceed, CEED_ERROR_MINOR, "Only strided CeedElemRestriction have strided L-vector layout"); + CeedCall(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); + if (has_backend_strides) { + CeedCheck(rstr->l_layout[0], rstr->ceed, CEED_ERROR_MINOR, "CeedElemRestriction has no L-vector layout data"); + for (CeedInt i = 0; i < 3; i++) layout[i] = rstr->l_layout[i]; + } else { + CeedCall(CeedElemRestrictionGetStrides(rstr, layout)); + } + return CEED_ERROR_SUCCESS; +} + +/** + + @brief Set the L-vector layout of a strided `CeedElemRestriction` + + @param[in] rstr `CeedElemRestriction` + @param[in] layout Variable to containing layout array, stored as `[nodes, components, elements]`. + The data for node `i`, component `j`, element `k` in the E-vector is given by `i*layout[0] + j*layout[1] + k*layout[2]`. + + @return An error code: 0 - success, otherwise - failure + + @ref Backend +**/ +int CeedElemRestrictionSetLLayout(CeedElemRestriction rstr, CeedInt layout[3]) { + CeedRestrictionType rstr_type; + + CeedCall(CeedElemRestrictionGetType(rstr, &rstr_type)); + CeedCheck(rstr_type == CEED_RESTRICTION_STRIDED, rstr->ceed, CEED_ERROR_MINOR, "Only strided CeedElemRestriction have strided L-vector layout"); + for (CeedInt i = 0; i < 3; i++) rstr->l_layout[i] = layout[i]; + return CEED_ERROR_SUCCESS; +} + /** @brief Get the E-vector layout of a `CeedElemRestriction` @@ -348,9 +397,9 @@ int CeedElemRestrictionRestoreCurlOrientations(CeedElemRestriction rstr, const C @ref Backend **/ -int CeedElemRestrictionGetELayout(CeedElemRestriction rstr, CeedInt (*layout)[3]) { - CeedCheck(rstr->layout[0], rstr->ceed, CEED_ERROR_MINOR, "CeedElemRestriction has no layout data"); - for (CeedInt i = 0; i < 3; i++) (*layout)[i] = rstr->layout[i]; +int CeedElemRestrictionGetELayout(CeedElemRestriction rstr, CeedInt layout[3]) { + CeedCheck(rstr->e_layout[0], rstr->ceed, CEED_ERROR_MINOR, "CeedElemRestriction has no E-vector layout data"); + for (CeedInt i = 0; i < 3; i++) layout[i] = rstr->e_layout[i]; return CEED_ERROR_SUCCESS; } @@ -367,7 +416,7 @@ int CeedElemRestrictionGetELayout(CeedElemRestriction rstr, CeedInt (*layout)[3] @ref Backend **/ int CeedElemRestrictionSetELayout(CeedElemRestriction rstr, CeedInt layout[3]) { - for (CeedInt i = 0; i < 3; i++) rstr->layout[i] = layout[i]; + for (CeedInt i = 0; i < 3; i++) rstr->e_layout[i] = layout[i]; return CEED_ERROR_SUCCESS; } @@ -664,6 +713,8 @@ int CeedElemRestrictionCreateCurlOriented(Ceed ceed, CeedInt num_elem, CeedInt e @param[in] strides Array for strides between `[nodes, components, elements]`. Data for node `i`, component `j`, element `k` can be found in the L-vector at index `i*strides[0] + j*strides[1] + k*strides[2]`. @ref CEED_STRIDES_BACKEND may be used for `CeedVector` ordered by the same `Ceed` backend. + `CEED_STRIDES_BACKEND` should only be used pass data between `CeedOperator` created with the same `Ceed` backend. + The L-vector layout will, in general, be different between `Ceed` backends. @param[out] rstr Address of the variable where the newly created `CeedElemRestriction` will be stored @return An error code: 0 - success, otherwise - failure diff --git a/interface/ceed-fortran.c b/interface/ceed-fortran.c index af45edbb69..a0c0cf214b 100644 --- a/interface/ceed-fortran.c +++ b/interface/ceed-fortran.c @@ -445,7 +445,7 @@ CEED_EXTERN void fCeedElemRestrictionGetMultiplicity(int *elemr, int *mult, int #define fCeedElemRestrictionGetELayout FORTRAN_NAME(ceedelemrestrictiongetelayout, CEEDELEMRESTRICTIONGETELAYOUT) CEED_EXTERN void fCeedElemRestrictionGetELayout(int *elemr, int *layout, int *err) { CeedInt layout_c[3]; - *err = CeedElemRestrictionGetELayout(CeedElemRestriction_dict[*elemr], &layout_c); + *err = CeedElemRestrictionGetELayout(CeedElemRestriction_dict[*elemr], layout_c); for (int i = 0; i < 3; i++) layout[i] = layout_c[i]; } diff --git a/interface/ceed-preconditioning.c b/interface/ceed-preconditioning.c index bd611e81bd..7a0029779c 100644 --- a/interface/ceed-preconditioning.c +++ b/interface/ceed-preconditioning.c @@ -166,7 +166,7 @@ static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, Ce CeedElemRestriction assembled_elem_rstr = NULL; CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request)); - CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); + CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf)); CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); @@ -388,7 +388,7 @@ static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, C CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in)); CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in)); CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in)); - CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in)); + CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in)); // Determine elem_dof relation for input CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in)); @@ -409,7 +409,7 @@ static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, C "Active input and output operator restrictions must have the same number of elements"); CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out)); CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out)); - CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out)); + CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out)); // Determine elem_dof relation for output CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out)); @@ -514,7 +514,7 @@ static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVecto CeedElemRestriction assembled_elem_rstr = NULL; CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE)); - CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf)); + CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf)); CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr)); CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array)); @@ -2648,7 +2648,7 @@ int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, // Assemble QFunction CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request)); - CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout)); + CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout)); CeedCall(CeedElemRestrictionDestroy(&rstr_qf)); CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm)); diff --git a/python/build_ceed_cffi.py b/python/build_ceed_cffi.py index fc5bfbaf25..772a6c753f 100644 --- a/python/build_ceed_cffi.py +++ b/python/build_ceed_cffi.py @@ -62,7 +62,8 @@ def get_ceed_dirs(): header = '\n'.join(lines) header = header.split("static inline CeedInt CeedIntPow", 1)[0] header += '\nextern int CeedVectorGetState(CeedVector, uint64_t*);' -header += '\nextern int CeedElemRestrictionGetELayout(CeedElemRestriction, CeedInt (*layout)[3]);' +header += '\nextern int CeedElemRestrictionGetLLayout(CeedElemRestriction, CeedInt layout[3]);' +header += '\nextern int CeedElemRestrictionGetELayout(CeedElemRestriction, CeedInt layout[3]);' # Note: cffi cannot handle vargs header = re.sub("va_list", "const char *", header) diff --git a/python/ceed_elemrestriction.py b/python/ceed_elemrestriction.py index c881b5e803..ee6d05bf55 100644 --- a/python/ceed_elemrestriction.py +++ b/python/ceed_elemrestriction.py @@ -122,8 +122,30 @@ def get_multiplicity(self): # Return return mult - # Get ElemRestrition Layout - def get_layout(self): + # Get ElemRestrition L-vector Layout + def get_l_layout(self): + """Get the local vector layout of an ElemRestriction. + + Returns: + layout: Vector containing layout array, stored as [nodes, components, elements]. + The data for node i, component j, element k in the element + vector is given by i*layout[0] + j*layout[1] + k*layout[2].""" + + # Create output array + layout = np.zeros(3, dtype="int32") + layout_pointer = ffi.cast("const CeedInt *", + layout.__array_interface__['data'][0]) + + # libCEED call + err_code = lib.CeedElemRestrictionGetLLayout( + self._pointer[0], layout_pointer) + self._ceed._check_error(err_code) + + # Return + return layout + + # Get ElemRestrition E-vector Layout + def get_e_layout(self): """Get the element vector layout of an ElemRestriction. Returns: @@ -133,13 +155,12 @@ def get_layout(self): # Create output array layout = np.zeros(3, dtype="int32") - array_pointer = ffi.cast( - "CeedInt (*)[3]", - layout.__array_interface__['data'][0]) + layout_pointer = ffi.cast("const CeedInt *", + layout.__array_interface__['data'][0]) # libCEED call err_code = lib.CeedElemRestrictionGetELayout( - self._pointer[0], array_pointer) + self._pointer[0], layout_pointer) self._ceed._check_error(err_code) # Return diff --git a/python/tests/test-2-elemrestriction.py b/python/tests/test-2-elemrestriction.py index 0d12b7e126..230b839d8a 100644 --- a/python/tests/test-2-elemrestriction.py +++ b/python/tests/test-2-elemrestriction.py @@ -98,7 +98,7 @@ def test_202(ceed_resource, capsys): # NoTranspose r.apply(x, y) - layout = r.get_layout() + layout = r.get_e_layout() with y.array_read() as y_array: for i in range(elem_size): for j in range(1): @@ -146,7 +146,7 @@ def test_208(ceed_resource): # NoTranspose r.apply_block(1, x, y) - layout = r.get_layout() + layout = r.get_e_layout() with y.array_read() as y_array: for i in range(elem_size): for j in range(1): diff --git a/tests/t201-elemrestriction.c b/tests/t201-elemrestriction.c index 32d2d09afa..d132e90b76 100644 --- a/tests/t201-elemrestriction.c +++ b/tests/t201-elemrestriction.c @@ -28,7 +28,7 @@ int main(int argc, char **argv) { const CeedScalar *y_array; CeedVectorGetArrayRead(y, CEED_MEM_HOST, &y_array); - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); for (CeedInt i = 0; i < 2; i++) { // Node for (CeedInt j = 0; j < 1; j++) { // Component for (CeedInt k = 0; k < num_elem; k++) { // Element diff --git a/tests/t202-elemrestriction.c b/tests/t202-elemrestriction.c index 26b3889dc0..354f651cca 100644 --- a/tests/t202-elemrestriction.c +++ b/tests/t202-elemrestriction.c @@ -36,7 +36,7 @@ int main(int argc, char **argv) { const CeedScalar *y_array; CeedVectorGetArrayRead(y, CEED_MEM_HOST, &y_array); - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); for (CeedInt i = 0; i < elem_size; i++) { // Node for (CeedInt j = 0; j < 1; j++) { // Component for (CeedInt k = 0; k < num_elem; k++) { // Element diff --git a/tests/t203-elemrestriction.c b/tests/t203-elemrestriction.c index 892964a49a..39d17aee9e 100644 --- a/tests/t203-elemrestriction.c +++ b/tests/t203-elemrestriction.c @@ -42,7 +42,7 @@ int main(int argc, char **argv) { const CeedScalar *y_array; CeedVectorGetArrayRead(y, CEED_MEM_HOST, &y_array); - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); for (CeedInt i = 0; i < elem_size; i++) { // Node for (CeedInt j = 0; j < num_comp; j++) { // Component for (CeedInt k = 0; k < num_elem; k++) { // Element diff --git a/tests/t204-elemrestriction.c b/tests/t204-elemrestriction.c index d8044e1f1e..8c833e5a58 100644 --- a/tests/t204-elemrestriction.c +++ b/tests/t204-elemrestriction.c @@ -39,7 +39,7 @@ int main(int argc, char **argv) { const CeedScalar *y_array; CeedVectorGetArrayRead(y, CEED_MEM_HOST, &y_array); - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); for (CeedInt i = 0; i < 2; i++) { // Node for (CeedInt j = 0; j < 2; j++) { // Component for (CeedInt k = 0; k < num_elem; k++) { // Element diff --git a/tests/t205-elemrestriction.c b/tests/t205-elemrestriction.c index 41ebee6b4d..f18be4d846 100644 --- a/tests/t205-elemrestriction.c +++ b/tests/t205-elemrestriction.c @@ -39,7 +39,7 @@ int main(int argc, char **argv) { const CeedScalar *y_array; CeedVectorGetArrayRead(y, CEED_MEM_HOST, &y_array); - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); for (CeedInt i = 0; i < 2; i++) { // Node for (CeedInt j = 0; j < 2; j++) { // Component for (CeedInt k = 0; k < num_elem; k++) { // Element diff --git a/tests/t206-elemrestriction.c b/tests/t206-elemrestriction.c index 9f2393ba21..5ba2df3187 100644 --- a/tests/t206-elemrestriction.c +++ b/tests/t206-elemrestriction.c @@ -28,7 +28,7 @@ int main(int argc, char **argv) { CeedElemRestrictionCreate(ceed, num_elem, 2, 2, num_elem + 1, 2 * (num_elem + 1), CEED_MEM_HOST, CEED_USE_POINTER, ind, &elem_restriction); // Set x data in backend E-layout - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); { CeedScalar x_array[2 * (num_elem * 2)]; diff --git a/tests/t207-elemrestriction.c b/tests/t207-elemrestriction.c index fe99d5d4a7..5b7c0c21e3 100644 --- a/tests/t207-elemrestriction.c +++ b/tests/t207-elemrestriction.c @@ -28,7 +28,7 @@ int main(int argc, char **argv) { CeedElemRestrictionCreate(ceed, num_elem, 2, 2, 1, 2 * (num_elem + 1), CEED_MEM_HOST, CEED_USE_POINTER, ind, &elem_restriction); // Set x data in backend E-layout - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); { CeedScalar x_array[2 * (num_elem * 2)]; diff --git a/tests/t208-elemrestriction.c b/tests/t208-elemrestriction.c index 4e7fbe2dbc..4a8cb7a4cb 100644 --- a/tests/t208-elemrestriction.c +++ b/tests/t208-elemrestriction.c @@ -35,7 +35,7 @@ int main(int argc, char **argv) { const CeedScalar *y_array; CeedVectorGetArrayRead(y, CEED_MEM_HOST, &y_array); - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); for (CeedInt i = 0; i < elem_size; i++) { // Node for (CeedInt j = 0; j < 1; j++) { // Component for (CeedInt k = blk_size; k < num_elem; k++) { // Element diff --git a/tests/t213-elemrestriction.c b/tests/t213-elemrestriction.c index 7f94035d0b..40ee380f47 100644 --- a/tests/t213-elemrestriction.c +++ b/tests/t213-elemrestriction.c @@ -46,7 +46,7 @@ int main(int argc, char **argv) { const CeedScalar *y_array; CeedVectorGetArrayRead(y, CEED_MEM_HOST, &y_array); - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); for (CeedInt i = 0; i < elem_size; i++) { // Node for (CeedInt j = 0; j < num_comp; j++) { // Component for (CeedInt k = 0; k < num_elem; k++) { // Element diff --git a/tests/t219-elemrestriction.c b/tests/t219-elemrestriction.c index 7912ca1f41..3ffcd69c48 100644 --- a/tests/t219-elemrestriction.c +++ b/tests/t219-elemrestriction.c @@ -51,7 +51,7 @@ int main(int argc, char **argv) { const CeedScalar *y_array; CeedVectorGetArrayRead(y, CEED_MEM_HOST, &y_array); - CeedElemRestrictionGetELayout(elem_restriction, &layout); + CeedElemRestrictionGetELayout(elem_restriction, layout); for (CeedInt i = 0; i < elem_size; i++) { // Node for (CeedInt j = 0; j < 1; j++) { // Component for (CeedInt k = 0; k < num_elem; k++) { // Element