From a2c7029a8de5fa25e893f7e40be1ba17be3dbf61 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 11 Nov 2021 07:49:16 -0700 Subject: [PATCH] sve - initial SVE backend framework --- Makefile | 17 ++- backends/ceed-backend-list.h | 2 + backends/sve/ceed-sve-blocked.c | 56 +++++++++ backends/sve/ceed-sve-serial.c | 56 +++++++++ backends/sve/ceed-sve-tensor-f64.c | 176 +++++++++++++++++++++++++++++ backends/sve/ceed-sve.h | 28 +++++ 6 files changed, 333 insertions(+), 2 deletions(-) create mode 100644 backends/sve/ceed-sve-blocked.c create mode 100644 backends/sve/ceed-sve-serial.c create mode 100644 backends/sve/ceed-sve-tensor-f64.c create mode 100644 backends/sve/ceed-sve.h diff --git a/Makefile b/Makefile index 52c6ceecae..181a203db6 100644 --- a/Makefile +++ b/Makefile @@ -223,6 +223,7 @@ blocked.c := $(sort $(wildcard backends/blocked/*.c)) ceedmemcheck.c := $(sort $(wildcard backends/memcheck/*.c)) opt.c := $(sort $(wildcard backends/opt/*.c)) avx.c := $(sort $(wildcard backends/avx/*.c)) +sve.c := $(sort $(wildcard backends/sve/*.c)) xsmm.c := $(sort $(wildcard backends/xsmm/*.c)) cuda.c := $(sort $(wildcard backends/cuda/*.c)) cuda.cpp := $(sort $(wildcard backends/cuda/*.cpp)) @@ -283,6 +284,7 @@ info: $(info ------------------------------------) $(info MEMCHK_STATUS = $(MEMCHK_STATUS)$(call backend_status,$(MEMCHK_BACKENDS))) $(info AVX_STATUS = $(AVX_STATUS)$(call backend_status,$(AVX_BACKENDS))) + $(info SVE_STATUS = $(SVE_STATUS)$(call backend_status,$(SVE_BACKENDS))) $(info XSMM_DIR = $(XSMM_DIR)$(call backend_status,$(XSMM_BACKENDS))) $(info OCCA_DIR = $(OCCA_DIR)$(call backend_status,$(OCCA_BACKENDS))) $(info MAGMA_DIR = $(MAGMA_DIR)$(call backend_status,$(MAGMA_BACKENDS))) @@ -323,7 +325,7 @@ ifeq ($(MEMCHK),1) BACKENDS_MAKE += $(MEMCHK_BACKENDS) endif -# AVX Backed +# AVX Backends AVX_STATUS = Disabled AVX_FLAG := $(if $(filter clang,$(CC_VENDOR)),+avx,-mavx) AVX := $(filter $(AVX_FLAG),$(shell $(CC) $(OPT) -v -E -x c /dev/null 2>&1)) @@ -334,6 +336,17 @@ ifneq ($(AVX),) BACKENDS_MAKE += $(AVX_BACKENDS) endif +# SVE Backends +SVE_STATUS = Disabled +SVE_FLAG := $(if $(filter clang,$(CC_VENDOR)),+sve,-msve) +SVE := 1 +SVE_BACKENDS = /cpu/self/sve/serial /cpu/self/sve/blocked +ifneq ($(SVE),) + SVE_STATUS = Enabled + libceed.c += $(sve.c) + BACKENDS_MAKE += $(SVE_BACKENDS) +endif + # Collect list of libraries and paths for use in linking and pkg-config PKG_LIBS = @@ -410,7 +423,7 @@ ifneq ($(HIP_LIB_DIR),) BACKENDS_MAKE += $(HIP_BACKENDS) endif -# MAGMA Backend +# MAGMA Backends ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),) MAGMA_ARCH=$(shell nm -g $(MAGMA_DIR)/lib/libmagma.* | grep -c "hipblas") ifeq ($(MAGMA_ARCH), 0) #CUDA MAGMA diff --git a/backends/ceed-backend-list.h b/backends/ceed-backend-list.h index 1a6fef4984..c73c4641b4 100644 --- a/backends/ceed-backend-list.h +++ b/backends/ceed-backend-list.h @@ -22,6 +22,8 @@ MACRO(CeedRegister_Opt_Blocked, 1, "/cpu/self/opt/blocked") MACRO(CeedRegister_Opt_Serial, 1, "/cpu/self/opt/serial") MACRO(CeedRegister_Ref, 1, "/cpu/self/ref/serial") MACRO(CeedRegister_Ref_Blocked, 1, "/cpu/self/ref/blocked") +MACRO(CeedRegister_Sve_Serial, 1, "/cpu/self/sve/serial") +MACRO(CeedRegister_Sve_Blocked, 1, "/cpu/self/sve/blocked") MACRO(CeedRegister_Tmpl, 1, "/cpu/self/tmpl") MACRO(CeedRegister_Tmpl_Sub, 1, "/cpu/self/tmpl/sub") MACRO(CeedRegister_Xsmm_Blocked, 1, "/cpu/self/xsmm/blocked") diff --git a/backends/sve/ceed-sve-blocked.c b/backends/sve/ceed-sve-blocked.c new file mode 100644 index 0000000000..bfdce31b5b --- /dev/null +++ b/backends/sve/ceed-sve-blocked.c @@ -0,0 +1,56 @@ +// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. +// All Rights reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#include +#include +#include +#include +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Backend Init +//------------------------------------------------------------------------------ +static int CeedInit_Sve(const char *resource, Ceed ceed) { + int ierr; + if (strcmp(resource, "/cpu/self") && strcmp(resource, "/cpu/self/sve") && + strcmp(resource, "/cpu/self/sve/blocked")) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "SVE backend cannot use resource: %s", resource); + // LCOV_EXCL_STOP + ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); + + // Create reference CEED that implementation will be dispatched + // through unless overridden + Ceed ceed_ref; + CeedInit("/cpu/self/opt/blocked", &ceed_ref); + ierr = CeedSetDelegate(ceed, ceed_ref); CeedChkBackend(ierr); + +// TODO: Make f64 and f32 versions + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", + CeedTensorContractCreate_f64_Sve); + CeedChkBackend(ierr); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Backend Register +//------------------------------------------------------------------------------ +CEED_INTERN int CeedRegister_Sve_Blocked(void) { + return CeedRegister("/cpu/self/sve/blocked", CeedInit_Sve, 30); +} +//------------------------------------------------------------------------------ diff --git a/backends/sve/ceed-sve-serial.c b/backends/sve/ceed-sve-serial.c new file mode 100644 index 0000000000..26f27bba30 --- /dev/null +++ b/backends/sve/ceed-sve-serial.c @@ -0,0 +1,56 @@ +// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. +// All Rights reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#include +#include +#include +#include +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Backend Init +//------------------------------------------------------------------------------ +static int CeedInit_Sve(const char *resource, Ceed ceed) { + int ierr; + if (strcmp(resource, "/cpu/self") + && strcmp(resource, "/cpu/self/sve/serial")) + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, + "SVE backend cannot use resource: %s", resource); + // LCOV_EXCL_STOP + ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr); + + // Create reference CEED that implementation will be dispatched + // through unless overridden + Ceed ceed_ref; + CeedInit("/cpu/self/opt/serial", &ceed_ref); + ierr = CeedSetDelegate(ceed, ceed_ref); CeedChkBackend(ierr); + +// TODO: Make f64 and f32 versions + ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", + CeedTensorContractCreate_f64_Sve); + CeedChkBackend(ierr); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Backend Register +//------------------------------------------------------------------------------ +CEED_INTERN int CeedRegister_Sve_Serial(void) { + return CeedRegister("/cpu/self/sve/serial", CeedInit_Sve, 35); +} +//------------------------------------------------------------------------------ diff --git a/backends/sve/ceed-sve-tensor-f64.c b/backends/sve/ceed-sve-tensor-f64.c new file mode 100644 index 0000000000..5ba51538ff --- /dev/null +++ b/backends/sve/ceed-sve-tensor-f64.c @@ -0,0 +1,176 @@ +// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. +// All Rights reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#include +#include +#ifdef __ARM_FEATURE_SVE +#include +#endif +#include +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Blocked Tensor Contract +//------------------------------------------------------------------------------ +static inline int CeedTensorContract_Sve_Blocked(CeedTensorContract contract, + CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, + double *restrict v, const CeedInt JJ) { + CeedInt t_stride_0 = B, t_stride_1 = 1; + if (t_mode == CEED_TRANSPOSE) { + t_stride_0 = 1; t_stride_1 = J; + } + + for (CeedInt a=0; a +#include + +CEED_INTERN int CeedTensorContractCreate_f32_Sve(CeedBasis basis, + CeedTensorContract contract); +CEED_INTERN int CeedTensorContractCreate_f64_Sve(CeedBasis basis, + CeedTensorContract contract); + +#endif // _ceed_sve_h