diff --git a/configs/common/packages.yaml b/configs/common/packages.yaml index 72e0356a8..9bae2c5c1 100644 --- a/configs/common/packages.yaml +++ b/configs/common/packages.yaml @@ -76,6 +76,10 @@ packages: # message: "Extra ESMF compile options for GCC" prefer: - '+python' + # To avoid duplicate packages (concretizer bug?) + expat: + require: + - '~libbsd' fckit: require: '@0.13.2 +eckit' fftw: diff --git a/configs/sites/tier1/atlantis/compilers.yaml b/configs/sites/tier1/atlantis/compilers.yaml index c7c48def4..c2250ac45 100644 --- a/configs/sites/tier1/atlantis/compilers.yaml +++ b/configs/sites/tier1/atlantis/compilers.yaml @@ -16,7 +16,7 @@ compilers: prepend_path: PATH: /cm/local/apps/gcc/11.2.0/bin CPATH: /cm/local/apps/gcc/11.2.0/include - LD_LIBRARY_PATH: /cm/shared/apps/slurm/21.08.8/lib64:/cm/shared/apps/intel/oneapi/compiler/2022.1.0/linux/compiler/lib/intel64_lin + LD_LIBRARY_PATH: /cm/shared/apps/intel/oneapi/compiler/2022.1.0/linux/compiler/lib/intel64_lin:/cm/local/apps/gcc/11.2.0/lib64:/cm/local/apps/gcc/11.2.0/lib extra_rpaths: [] - compiler: spec: gcc@11.2.0 @@ -44,15 +44,14 @@ compilers: target: x86_64 modules: - intel-oneapi-2024.2.1 + - tbb/2021.13 - compiler-rt/2024.2.1 - compiler/2024.2.1 environment: prepend_path: PATH: /cm/local/apps/gcc/11.2.0/bin CPATH: /cm/local/apps/gcc/11.2.0/include - LD_LIBRARY_PATH: /cm/shared/apps/slurm/21.08.8/lib64:/cm/shared/apps/slurm/21.08.8/lib64/slurm:/cm/shared/apps/intel/oneapi-2024.2.1/compiler/2024 - append_path: - CPATH: /cm/shared/apps/intel/oneapi-2024.2.1/compiler/2024.2/opt/compiler/include/intel64/ + LD_LIBRARY_PATH: /cm/local/apps/gcc/11.2.0/lib64:/cm/local/apps/gcc/11.2.0/lib extra_rpaths: [] #- compiler: # spec: oneapi@2025.0.3 @@ -73,7 +72,5 @@ compilers: # prepend_path: # PATH: /cm/local/apps/gcc/11.2.0/bin # CPATH: /cm/local/apps/gcc/11.2.0/include -# LD_LIBRARY_PATH: /cm/shared/apps/slurm/21.08.8/lib64:/cm/shared/apps/slurm/21.08.8/lib64/slurm:/gpfs/neptune/spack-stack/oneapi-2025.0.3/compiler/2025.0 -# append_path: -# CPATH: /gpfs/neptune/spack-stack/oneapi-2025.0.3/compiler/2025.0/opt/compiler/include/intel64 +# LD_LIBRARY_PATH: /cm/local/apps/gcc/11.2.0/lib64:/cm/local/apps/gcc/11.2.0/lib # extra_rpaths: [] diff --git a/configs/sites/tier1/atlantis/packages_gcc.yaml b/configs/sites/tier1/atlantis/packages_gcc.yaml index bc10f8b75..0d85ceef2 100644 --- a/configs/sites/tier1/atlantis/packages_gcc.yaml +++ b/configs/sites/tier1/atlantis/packages_gcc.yaml @@ -11,3 +11,4 @@ packages: - spec: openmpi@4.1.5%gcc@=11.2.0~cuda~cxx~cxx_exceptions~java~memchecker+pmi~static~wrapper-rpath fabrics=ucx schedulers=slurm modules: - openmpi/mlnx/gcc/64/4.1.5a1 + - slurm diff --git a/configs/sites/tier1/nautilus/compilers.yaml b/configs/sites/tier1/nautilus/compilers.yaml index 147614a13..05a504822 100644 --- a/configs/sites/tier1/nautilus/compilers.yaml +++ b/configs/sites/tier1/nautilus/compilers.yaml @@ -84,22 +84,6 @@ compilers: # append_path: # CPATH: '/p/app/projects/NEPTUNE/spack-stack/oneapi-2025.0.0/compiler/2025.0/opt/compiler/include/intel64' # extra_rpaths: [] -- compiler: - spec: gcc@12.2.1 - paths: - cc: /opt/rh/gcc-toolset-12/root/usr/bin/gcc - cxx: /opt/rh/gcc-toolset-12/root/usr/bin/g++ - f77: /opt/rh/gcc-toolset-12/root/usr/bin/gfortran - fc: /opt/rh/gcc-toolset-12/root/usr/bin/gfortran - flags: {} - operating_system: rhel8 - target: x86_64 - modules: - - slurm - - scl/gcc-toolset-12 - environment: {} - extra_rpaths: [] -# Required for Intel backend - compiler: spec: gcc@11.2.1 paths: diff --git a/configs/sites/tier1/nautilus/packages_gcc.yaml b/configs/sites/tier1/nautilus/packages_gcc.yaml index 2e47c9e0e..1f6fd7a54 100644 --- a/configs/sites/tier1/nautilus/packages_gcc.yaml +++ b/configs/sites/tier1/nautilus/packages_gcc.yaml @@ -1,34 +1,14 @@ packages: -# On Nautilus, use intel-oneapi-mkl as provider -# for blas, lapack, fftw-api with GNU because of -# problems using openblas in downstream applications -# (e.g. py-pandas): -# ELF load command address/offset not properly aligned" when loading libopenblas.so -# https://github.com/OpenMathLib/OpenBLAS/wiki/Faq#ELFoffset all: - compiler:: [gcc@12.2.1] + compiler:: [gcc@11.2.1] providers: mpi:: [openmpi@5.0.1] - blas:: [intel-oneapi-mkl] - fftw-api:: [intel-oneapi-mkl] - lapack:: [intel-oneapi-mkl] mpi: buildable: False openmpi: externals: - - spec: openmpi@5.0.1%gcc@12.2.1~cuda~java~memchecker~static~wrapper-rpath fabrics=ucx schedulers=slurm + - spec: openmpi@5.0.1%gcc@11.2.1~cuda~java~memchecker~static~wrapper-rpath fabrics=ucx schedulers=slurm prefix: /p/app/penguin/openmpi/5.0.1/gcc-8.5.0 modules: - penguin/openmpi/5.0.1/gcc-8.5.0 - slurm - openblas: - buildable: False - ectrans: - require:: - - '@1.2.0 +mkl ~fftw' - gsibec: - require:: - - '@1.2.1 +mkl' - py-numpy: - require:: - - '^intel-oneapi-mkl' diff --git a/doc/source/PreConfiguredSites.rst b/doc/source/PreConfiguredSites.rst index 094fdd604..32257ea1e 100644 --- a/doc/source/PreConfiguredSites.rst +++ b/doc/source/PreConfiguredSites.rst @@ -223,6 +223,14 @@ The following is required for building new spack environments with GNU on this p module unload cray-libsci module load cray-libsci/23.05.1.4 +.. warning:: + After the successful build of a spack-stack environment, a utility script ``util/narwhal/fix_libsci.sh`` must be run to replace references to an old version of ``libsci`` in several shared libraries. See https://github.com/JCSDA/spack-stack/pull/1449 and https://github.com/JCSDA/spack-stack/issues/1447 for more information. + +.. code-block:: console + + # After running 'spack install' (or after 'spack stack setup-meta-modules') + ./util/narwhal/fix_libsci.sh 2>&1 | tee log.ENV_NAME_HERE.fix_libsci.001 + .. _Preconfigured_Sites_Nautilus: diff --git a/util/narwhal/fix_libsci.sh b/util/narwhal/fix_libsci.sh new file mode 100755 index 000000000..5c96f28ca --- /dev/null +++ b/util/narwhal/fix_libsci.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash + +# Utility script to work arounds issues on Narwhal, where an old cray-libsci +# implementation is found and used by spack packages that use autotools, +# libtool, and configure. This is because of symbolic links in /opt/cray/pe/lib64 +# pointing to the old (v21) libsci installation, and /etc/ld.so.conf.d/ containing +# a file that tells the build system to include libraries from /opt/cray/pe/lib64. + +# The approach is to scan all shared libraries in a spack-stack environment and +# use patchelf to replace the "bad" libsci references in those with references +# to the "good" libsci version (determined by looking at the module that is loaded). + +# Check input requirements +echo +echo "Checking for patchelf ..." +which patchelf || (echo "ERROR, patchelf not found!" && exit 1) + +echo +echo "Checking for active spack environment ..." +[ ! -z ${SPACK_ENV} ] && echo ${SPACK_ENV} \ + || (echo "ERROR, not in an active spack environment!" && exit 1) + +echo +echo "Checking for libsci in user environment ..." +[ ! -z ${CRAY_LIBSCI_PREFIX} ] && echo ${CRAY_LIBSCI_PREFIX} \ + || (echo "ERROR, CRAY_LIBSCI_PREFIX not defined!" && exit 1) + +BAD_LIBSCI_PATH="/opt/cray/pe/lib64" +GOOD_LIBSCI_PATH="${CRAY_LIBSCI_PREFIX}/lib" +LIBSCI_PREFIX="libsci" + +echo +for shlib in `lfs find ${SPACK_ENV}/install -type f -name 'lib*.so*'`; do + # Skip backups taken by this program + if [[ "${shlib}" == *"backup.libsci-original"* ]]; then + echo "Skipping ${shlib} ..." + continue + fi + # Check shared library + echo "Checking ${shlib} ..." + # First pass: check and fix, second pass: re-check and error out if still bad + for (( pass=1; pass<=2; pass++ )); do + # Check if grep finds a link to the bad libsci (if so, $? is zero) + ldd $shlib | grep ${BAD_LIBSCI_PATH}/${LIBSCI_PREFIX} > /dev/null 2>&1 + if [[ $? -eq 0 ]]; then + if [[ ${pass} -eq 1 ]]; then + echo "Fixing ${shlib} ..." + elif [[ ${pass} -eq 2 ]]; then + echo "ERROR, when re-checking ${shlib}, still found bad libsci links!" + ldd $shlib | grep ${BAD_LIBSCI_PATH}/${LIBSCI_PREFIX} + exit 1 + fi + # Get all offending references to the "bad" libsci (split multiline string into array) + results=`ldd $shlib | grep ${BAD_LIBSCI_PATH}/${LIBSCI_PREFIX}` + IFS=$'\n' read -r -d '' -a results <<< "$results" + # Loop over lines containing references to "bad" libsci + for (( i=0; i<${#results[@]}; i++ )); do + # Split each line into items by whitespace + IFS=' ' read -r -a items <<< "${results[$i]}" + # Remove all leading and trailing whitespaces from each of the items + for (( j=0; j<${#items[@]}; j++ )); do + items[$j]=$(sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'<<<"${items[$j]}") + done + # Sanity check 1. The item with index 1 must be '=>' after removing whitespaces + if [[ ! "${items[1]}" == "=>" ]]; then + echo "ERROR parsing ldd output for file ${shlib}: '${results[$i]}'" + exit 1 + fi + # Sanity check 2. The name of the library (item[0]) must match the + # name of the library it is linked to (filename from target path item[2]) + libname=${items[0]} + test_libname=$(echo "${items[2]}" | rev | cut -d'/' -f1 | rev) + if [[ ! "${items[0]}" == "${test_libname}" ]]; then + echo "ERROR matching library name in ldd output for file ${shlib}: '${results[$i]}'" + exit 1 + fi + replacement=${GOOD_LIBSCI_PATH}/${libname} + # Sanity check 3. The library must exist in the "good" libsci path. + if [[ ! -e ${replacement} ]]; then + echo "ERROR, replacement ${replacement} for ${to_replace} does not exist!" + exit 1 + fi + # Create a backup of the original shared library and replace the old + # library (referenced as ${libname}) with the new one (${replacement}) + if [[ ! -f ${shlib}.backup.libsci-original ]]; then + echo "Creating backup ${shlib}.backup.libsci-original" + cp -a ${shlib} ${shlib}.backup.libsci-original + fi + # Patch shared library. Replace reference (rpath/direct link) with direct link + echo "Executing 'patchelf --replace-needed ${libname} ${replacement} ${shlib}'" + patchelf --replace-needed ${libname} ${replacement} ${shlib} + if [[ $? -ne 0 ]]; then + echo "ERROR executing 'patchelf --replace-needed ${libname} ${replacement} ${shlib}'" + exit 1 + fi + done + else + break + fi + done +done