From c45b0cdc4b7f106e1294eb4a7359ff35ad9c300e Mon Sep 17 00:00:00 2001 From: kvrigor Date: Wed, 19 Nov 2025 13:17:01 +0100 Subject: [PATCH 1/9] Env files for NVHPC toolchain --- env/jsc.2025.nvhpc.openmpi | 57 ++++++++++++++++++++++++++++++++++++++ env/jsc.2025.nvhpc.psmpi | 54 ++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 env/jsc.2025.nvhpc.openmpi create mode 100644 env/jsc.2025.nvhpc.psmpi diff --git a/env/jsc.2025.nvhpc.openmpi b/env/jsc.2025.nvhpc.openmpi new file mode 100644 index 0000000..5b3be6f --- /dev/null +++ b/env/jsc.2025.nvhpc.openmpi @@ -0,0 +1,57 @@ +# -------------------------------------------------------------------------- +# Loads IntelLLVM+ParaStationMPI build environment for TSMP2. +# This environment is tailored for JURECA [1] and JUWELS [2] supercomputers. +# +# [1] https://apps.fz-juelich.de/jsc/software/jureca/index.xhtml +# [2] https://apps.fz-juelich.de/jsc/software/juwels/index.xhtml +# +# Usage: source jsc.2025.intel.psmpi +# -------------------------------------------------------------------------- + +# Load Stages/2025 +module --force purge +module use $OTHERSTAGES +module load Stages/2025 +module load NVHPC/25.5-CUDA-12 +module load OpenMPI + +# Basic scripting and build tools +module load Python +module load CMake +module load git + +# Storage libraries +module load HDF5 +module load netCDF +module load netCDF-Fortran +module load PnetCDF + +# ParFlow additional libraries (TODO) +# module load Hypre # Non-existent under NVHPC toolchain +# module load Tcl +if [[ "$1" == "--parflowgpu" ]]; then + echo "ERROR: Hypre isn't yet available on the NVHPC toolchain." +fi + +# ICON additional libraries +module load ecCodes + +# Set default MPI compilers +export OMPI_CC=nvc +export OMPI_CXX=nvc++ +export OMPI_FC=nvfortran +export CC=mpicc +export FC=mpif90 +export CXX=mpicxx +export MPI_HOME=$EBROOTOPENMPI + +# Display compiler settings +module list +echo "=============== COMPILER SETTINGS ===============" +echo " Machine: ${SYSTEMNAME} on Stages/$STAGE" +echo " MPI lib: $(mpirun --version | head -n 1)" +echo " C: $($CC --version | head -n 2 | tail -n 1)" +echo " C++: $($CXX --version | head -n 2 | tail -n 1)" +echo " Fortran: $($FC --version | head -n 2 | tail -n 1)" +echo "==================================================" +echo "" diff --git a/env/jsc.2025.nvhpc.psmpi b/env/jsc.2025.nvhpc.psmpi new file mode 100644 index 0000000..7792c32 --- /dev/null +++ b/env/jsc.2025.nvhpc.psmpi @@ -0,0 +1,54 @@ +# -------------------------------------------------------------------------- +# Loads IntelLLVM+ParaStationMPI build environment for TSMP2. +# This environment is tailored for JURECA [1] and JUWELS [2] supercomputers. +# +# [1] https://apps.fz-juelich.de/jsc/software/jureca/index.xhtml +# [2] https://apps.fz-juelich.de/jsc/software/juwels/index.xhtml +# +# Usage: source jsc.2025.intel.psmpi +# -------------------------------------------------------------------------- + +# Load Stages/2025 +module --force purge +module use $OTHERSTAGES +module load Stages/2025 +module load NVHPC/25.5-CUDA-12 +module load ParaStationMPI + +# Basic scripting and build tools +module load Python +module load CMake +module load git + +# Storage libraries +module load HDF5 +module load netCDF +module load netCDF-Fortran +module load PnetCDF + +# ParFlow additional libraries (TODO) +# module load Hypre # Non-existent under NVHPC toolchain +# module load Tcl +if [[ "$1" == "--parflowgpu" ]]; then + echo "ERROR: Hypre isn't yet available on the NVHPC toolchain." +fi + +# ICON additional libraries +module load ecCodes + +# Set default MPI compilers +export CC=mpicc +export FC=mpif90 +export CXX=mpicxx +export MPI_HOME=$EBROOTPSMPI + +# Display compiler settings +module list +echo "====================================== COMPILER SETTINGS ======================================" +echo " Machine: ${SYSTEMNAME} on Stages/$STAGE" +echo " MPI lib: $(mpichversion | head -n 1 | tr -d =)" +echo " C: $($CC --version 2>/dev/null | head -n 2 | tail -n 1)" +echo " C++: $($CXX --version 2>/dev/null | head -n 2 | tail -n 1)" +echo " Fortran: $($FC --version 2>/dev/null | head -n 2 | tail -n 1)" +echo "===============================================================================================" +echo "" From 8dbeaf66f1d052dbf9ca67c53f90c1a41c1d4f52 Mon Sep 17 00:00:00 2001 From: kvrigor Date: Fri, 28 Nov 2025 09:37:00 +0100 Subject: [PATCH 2/9] NVHPC Hypre is now available --- env/jsc.2025.nvhpc.openmpi | 23 +++++++++++++++-------- env/jsc.2025.nvhpc.psmpi | 18 ++++++++++++------ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/env/jsc.2025.nvhpc.openmpi b/env/jsc.2025.nvhpc.openmpi index 5b3be6f..6241d61 100644 --- a/env/jsc.2025.nvhpc.openmpi +++ b/env/jsc.2025.nvhpc.openmpi @@ -26,12 +26,19 @@ module load netCDF module load netCDF-Fortran module load PnetCDF -# ParFlow additional libraries (TODO) -# module load Hypre # Non-existent under NVHPC toolchain -# module load Tcl -if [[ "$1" == "--parflowgpu" ]]; then - echo "ERROR: Hypre isn't yet available on the NVHPC toolchain." +# ParFlow additional libraries +module load CUDA +module load UCX-settings/RC-CUDA +module load Hypre/2.31.0 + +# TODO: Verify these values +if [[ $SYSTEMNAME == "jedi" || $SYSTEMNAME == "jupiter" ]]; then + export CUDAARCHS="90" +else + export CUDAARCHS="80" fi +export CMAKE_CUDA_RUNTIME_LIBRARY="Shared" + # ICON additional libraries module load ecCodes @@ -47,11 +54,11 @@ export MPI_HOME=$EBROOTOPENMPI # Display compiler settings module list -echo "=============== COMPILER SETTINGS ===============" +echo "=========================== COMPILER SETTINGS =======================" echo " Machine: ${SYSTEMNAME} on Stages/$STAGE" echo " MPI lib: $(mpirun --version | head -n 1)" echo " C: $($CC --version | head -n 2 | tail -n 1)" echo " C++: $($CXX --version | head -n 2 | tail -n 1)" echo " Fortran: $($FC --version | head -n 2 | tail -n 1)" -echo "==================================================" -echo "" +echo "======================================================================" + diff --git a/env/jsc.2025.nvhpc.psmpi b/env/jsc.2025.nvhpc.psmpi index 7792c32..fb52e49 100644 --- a/env/jsc.2025.nvhpc.psmpi +++ b/env/jsc.2025.nvhpc.psmpi @@ -27,11 +27,17 @@ module load netCDF-Fortran module load PnetCDF # ParFlow additional libraries (TODO) -# module load Hypre # Non-existent under NVHPC toolchain -# module load Tcl -if [[ "$1" == "--parflowgpu" ]]; then - echo "ERROR: Hypre isn't yet available on the NVHPC toolchain." +module load CUDA +module load UCX-settings/RC-CUDA +module load Hypre/2.31.0 + +# TODO: Verify these values +if [[ $SYSTEMNAME == "jedi" || $SYSTEMNAME == "jupiter" ]]; then + export CUDAARCHS="90" +else + export CUDAARCHS="80" fi +export CMAKE_CUDA_RUNTIME_LIBRARY="Shared" # ICON additional libraries module load ecCodes @@ -44,11 +50,11 @@ export MPI_HOME=$EBROOTPSMPI # Display compiler settings module list -echo "====================================== COMPILER SETTINGS ======================================" +echo "========================= COMPILER SETTINGS ==========================" echo " Machine: ${SYSTEMNAME} on Stages/$STAGE" echo " MPI lib: $(mpichversion | head -n 1 | tr -d =)" echo " C: $($CC --version 2>/dev/null | head -n 2 | tail -n 1)" echo " C++: $($CXX --version 2>/dev/null | head -n 2 | tail -n 1)" echo " Fortran: $($FC --version 2>/dev/null | head -n 2 | tail -n 1)" -echo "===============================================================================================" +echo "======================================================================" echo "" From fc9dbd93a104c2baef1c574c46cc8f20071d797a Mon Sep 17 00:00:00 2001 From: kvrigor Date: Mon, 1 Dec 2025 09:19:52 +0100 Subject: [PATCH 3/9] Added LAPACK module to intel.psmpi --- env/jsc.2025.intel.psmpi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/env/jsc.2025.intel.psmpi b/env/jsc.2025.intel.psmpi index fd52f7f..16d8493 100644 --- a/env/jsc.2025.intel.psmpi +++ b/env/jsc.2025.intel.psmpi @@ -22,6 +22,8 @@ else module load Intel fi module load ParaStationMPI +module load ScaLAPACK/2.2.0-fb + # Basic scripting and build tools module load Python From a1fa83e18c0a7516aabc37315cfce89273789cb9 Mon Sep 17 00:00:00 2001 From: kvrigor Date: Mon, 1 Dec 2025 10:25:19 +0100 Subject: [PATCH 4/9] Supported NVHPC on ParFlow --- cmake/BuildParFlow.cmake | 8 ++++++-- env/jsc.2025.nvhpc.openmpi | 1 + env/jsc.2025.nvhpc.psmpi | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cmake/BuildParFlow.cmake b/cmake/BuildParFlow.cmake index 708e14a..3071e33 100644 --- a/cmake/BuildParFlow.cmake +++ b/cmake/BuildParFlow.cmake @@ -35,7 +35,6 @@ else() else() set(PF_ACC_BACKEND "none") endif() - #TODO: Add support for 'kokkos' backend endif() # Set compiler flags @@ -48,7 +47,12 @@ if(CMAKE_C_COMPILER_ID STREQUAL "GNU") endif() set(PF_FFLAGS "-ffree-line-length-none -ffixed-line-length-none") elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") - set(PF_CFLAGS "-Wall -Werror -Wno-unused-function -Wno-unused-variable") +elseif(CMAKE_C_COMPILER_ID STREQUAL "NVHPC") + if (NOT ${ParFlowGPU}) + # TODO: Perhaps there's a case for using NVHPC to target CPU. This would require + # fiddling with libraries+environment so we don't support it for now. + message(FATAL_ERROR "NVHPC is only valid for ParflowGPU builds.") + endif() else() message(FATAL_ERROR "C compiler '${CMAKE_C_COMPILER_ID}' is not supported.") endif() diff --git a/env/jsc.2025.nvhpc.openmpi b/env/jsc.2025.nvhpc.openmpi index 6241d61..cc31ee9 100644 --- a/env/jsc.2025.nvhpc.openmpi +++ b/env/jsc.2025.nvhpc.openmpi @@ -14,6 +14,7 @@ module use $OTHERSTAGES module load Stages/2025 module load NVHPC/25.5-CUDA-12 module load OpenMPI +module load ScaLAPACK/2.2.0-fb # Basic scripting and build tools module load Python diff --git a/env/jsc.2025.nvhpc.psmpi b/env/jsc.2025.nvhpc.psmpi index fb52e49..36a204f 100644 --- a/env/jsc.2025.nvhpc.psmpi +++ b/env/jsc.2025.nvhpc.psmpi @@ -14,6 +14,7 @@ module use $OTHERSTAGES module load Stages/2025 module load NVHPC/25.5-CUDA-12 module load ParaStationMPI +module load ScaLAPACK/2.2.0-fb # Basic scripting and build tools module load Python From 41fc97ff12726ae51914d899e3ad7387124bd84c Mon Sep 17 00:00:00 2001 From: kvrigor Date: Mon, 1 Dec 2025 10:26:45 +0100 Subject: [PATCH 5/9] Switched eCLM to dev-nvhpc-support branch --- .gitmodules | 2 +- models/eCLM | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 2a7a07c..c705552 100644 --- a/.gitmodules +++ b/.gitmodules @@ -17,7 +17,7 @@ [submodule "models/eCLM"] path = models/eCLM url = https://github.com/HPSCTerrSys/eCLM.git - branch = beta-0.4 + branch = dev-nvhpc-support [submodule "models/oasis3-mct"] path = models/oasis3-mct url = https://icg4geo.icg.kfa-juelich.de/ExternalReposPublic/oasis3-mct diff --git a/models/eCLM b/models/eCLM index 4d567d2..78bb748 160000 --- a/models/eCLM +++ b/models/eCLM @@ -1 +1 @@ -Subproject commit 4d567d2d68cac0fba977914b4a9c3ba199afd0ff +Subproject commit 78bb7482d8ee6ba0e819ec3f0f0d91886a94caaf From 058e95651b12c577c2f2a86f7bc2c83f92c01653 Mon Sep 17 00:00:00 2001 From: kvrigor Date: Mon, 1 Dec 2025 14:00:21 +0100 Subject: [PATCH 6/9] Supported nvfortran build on OASIS3-MCT library A notable issue is the lack of quad-precision (16 bytes) support in NVHPC ([1]). Luckily OASIS has a __NO_16BYTE_REALS compiler definition to work around this issue. [1]: https://forums.developer.nvidia.com/t/need-support-for-real-16-on-cpu-in-the-nvfortran-compiler/316805 --- cmake/BuildOASIS3MCT.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/BuildOASIS3MCT.cmake b/cmake/BuildOASIS3MCT.cmake index c66840f..2fcb736 100644 --- a/cmake/BuildOASIS3MCT.cmake +++ b/cmake/BuildOASIS3MCT.cmake @@ -44,6 +44,9 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") file(APPEND ${OASIS_MAKE_INC} "FCBASEFLAGS = ${OPTIM} -I. -ffree-line-length-none -fallow-argument-mismatch ${OpenMP_Fortran_FLAGS}\n") elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" OR CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM") file(APPEND ${OASIS_MAKE_INC} "FCBASEFLAGS = ${OPTIM} -I. -xCORE-AVX2 -assume byterecl -mt_mpi ${OpenMP_Fortran_FLAGS}\n") +elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") + # nvfortran doesn't support real(16) / quad precision; hence the __NO_16BYTE_REALS here. + file(APPEND ${OASIS_MAKE_INC} "FCBASEFLAGS = -D__NO_16BYTE_REALS ${OPTIM} -I. ${OpenMP_Fortran_FLAGS}\n") else() message(FATAL_ERROR "Fortran compiler '${CMAKE_Fortran_COMPILER_ID}' is not supported.") endif() @@ -68,7 +71,7 @@ ExternalProject_Add(OASIS3_MCT SOURCE_DIR ${OASIS_SRC} BUILD_IN_SOURCE FALSE CONFIGURE_COMMAND "" - BUILD_COMMAND make -f ${OASIS_SRC}/util/make_dir/TopMakefileOasis3 static-libs -C ${OASIS_BLD_DIR} + BUILD_COMMAND make -f ${OASIS_SRC}/util/make_dir/TopMakefileOasis3 realclean static-libs -C ${OASIS_BLD_DIR} INSTALL_COMMAND "" ) From 6956195bd59d13bd8a69db71e2382672f53b657f Mon Sep 17 00:00:00 2001 From: kvrigor Date: Mon, 1 Dec 2025 14:39:42 +0100 Subject: [PATCH 7/9] Updated eCLM tip --- models/eCLM | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/eCLM b/models/eCLM index 78bb748..701d501 160000 --- a/models/eCLM +++ b/models/eCLM @@ -1 +1 @@ -Subproject commit 78bb7482d8ee6ba0e819ec3f0f0d91886a94caaf +Subproject commit 701d50170c6f23085c9d4bd68a8e752869d986c5 From 17928f5fdbc06658fe67ba08dc7a0337e4a097e2 Mon Sep 17 00:00:00 2001 From: kvrigor Date: Wed, 10 Dec 2025 11:44:24 +0100 Subject: [PATCH 8/9] Updated eCLM --- models/eCLM | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/eCLM b/models/eCLM index 701d501..548116b 160000 --- a/models/eCLM +++ b/models/eCLM @@ -1 +1 @@ -Subproject commit 701d50170c6f23085c9d4bd68a8e752869d986c5 +Subproject commit 548116ba71942636ffad8f6460b8d48f8b16670d From d338182174aa208b96282701289ffac79053762b Mon Sep 17 00:00:00 2001 From: Johannes Keller <16795031+jjokella@users.noreply.github.com> Date: Thu, 11 Dec 2025 12:57:47 +0100 Subject: [PATCH 9/9] Support NVHPC toolchain for PDAF (#116) * BuildPDAF: Support NVHPC toolchain for PDAF * Also: Updating to newer version of PDAF (relies on HPSCTerrSys/pdaf#40) --- cmake/BuildPDAF.cmake | 44 +++++++++++++++++++++++++++++++++++++++++++ models/pdaf | 2 +- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/cmake/BuildPDAF.cmake b/cmake/BuildPDAF.cmake index 50dc58a..499f1d1 100644 --- a/cmake/BuildPDAF.cmake +++ b/cmake/BuildPDAF.cmake @@ -24,6 +24,7 @@ endif() # LAPACK is required # For eCLM-PDAF, this setting has to be consistent with MKL/LAPACK # loading in `eCLM/src/clm5/CMakelists.txt` +# https://cmake.org/cmake/help/latest/module/FindLAPACK.html find_package(LAPACK REQUIRED) # OpenMP is required @@ -73,6 +74,12 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") list(APPEND PDAF_LINK_LIBS "-mkl") list(APPEND PDAF_LINK_LIBS "${LAPACK_LIBRARIES}") message(WARNING "LAPACK_LIBRARIES: ${LAPACK_LIBRARIES}") +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") + # MKL command + list(APPEND PDAF_LINK_LIBS "${LAPACK_LIBRARIES}") + list(APPEND PDAF_LINK_LIBS "${LAPACK_LINKER_FLAGS}") + message(STATUS "LAPACK_LIBRARIES: ${LAPACK_LIBRARIES}") + message(STATUS "LAPACK_LINKER_FLAGS: ${LAPACK_LINKER_FLAGS}") else() message(FATAL_ERROR "Unsupported CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") endif() @@ -147,6 +154,22 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") list(APPEND PDAF_FOPT "-fallow-argument-mismatch") list(APPEND PDAF_FOPT "-fcommon") +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") + + # Using NVHPC + if (CMAKE_BUILD_TYPE STREQUAL "RELEASE") + # Release optimization flags + list(APPEND PDAF_FOPT "-Ofast") + elseif (CMAKE_BUILD_TYPE STREQUAL "DEBUG") + # Debug optimization flags + list(APPEND PDAF_FOPT "-O0") + list(APPEND PDAF_FOPT "-g") + else() + message(FATAL_ERROR "Unsupported CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") + endif() + + list(APPEND PDAF_FOPT "-fPIC") + else() message(FATAL_ERROR "Unsupported CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") endif() @@ -195,6 +218,22 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") list(APPEND PDAF_COPT "-mcmodel=large") list(APPEND PDAF_COPT "-fcommon") +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") + + # Using NVHPC + if (CMAKE_BUILD_TYPE STREQUAL "RELEASE") + # Release optimization flags + list(APPEND PDAF_COPT "-Ofast") + elseif (CMAKE_BUILD_TYPE STREQUAL "DEBUG") + # Debug optimization flags + list(APPEND PDAF_COPT "-O0") + list(APPEND PDAF_COPT "-g") + else() + message(FATAL_ERROR "Unsupported CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") + endif() + + list(APPEND PDAF_COPT "-fPIC") + else() message(FATAL_ERROR "Unsupported CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") endif() @@ -213,6 +252,11 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") list(APPEND PDAF_DOUBLEPRECISION "-fdefault-real-8") +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") + + # doubleprecision flag!? + list(APPEND PDAF_DOUBLEPRECISION "-r8") + else() message(FATAL_ERROR "Unsupported CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") endif() diff --git a/models/pdaf b/models/pdaf index 8f8cc5c..3c9cef3 160000 --- a/models/pdaf +++ b/models/pdaf @@ -1 +1 @@ -Subproject commit 8f8cc5cb7502abdb4fd89f91807a177c9f0cb8b9 +Subproject commit 3c9cef36c088787f541f354ee39d63f4dac6e9dc