From f5d9892025c09b90d5e8b0402a2f5645d72ecad0 Mon Sep 17 00:00:00 2001 From: dkokron Date: Fri, 29 Mar 2024 11:32:47 -0500 Subject: [PATCH] feat: add support for collective parallel reads in fms2_io (#1477) --- fms2_io/include/netcdf_read_data.inc | 104 +++++++++++++++++++-------- fms2_io/netcdf_io.F90 | 82 +++++++++++++++------ mpp/mpp.F90 | 11 ++- 3 files changed, 145 insertions(+), 52 deletions(-) diff --git a/fms2_io/include/netcdf_read_data.inc b/fms2_io/include/netcdf_read_data.inc index 4bfd427970..b69046cc64 100644 --- a/fms2_io/include/netcdf_read_data.inc +++ b/fms2_io/include/netcdf_read_data.inc @@ -354,8 +354,12 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, & endif c(unlim_dim_index) = unlim_dim_level endif - if (fileobj%is_root) then + if(fileobj%use_collective) then varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) + ! NetCDF does not have the ability to specify collective I/O at + ! the file basis so we must activate at the variable level + err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) + call check_netcdf_code(err, append_error_msg) select type(buf) type is (integer(kind=i4_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) @@ -370,20 +374,38 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, & end select call check_netcdf_code(err, append_error_msg) call unpack_data_2d(fileobj, varid, variable_name, buf) - endif - if (bcast) then - select type(buf) - type is (integer(kind=i4_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (integer(kind=i8_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (real(kind=r4_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (real(kind=r8_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - class default - call error("Unsupported variable type: "//trim(append_error_msg)) - end select + else + if (fileobj%is_root) then + varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) + select type(buf) + type is (integer(kind=i4_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (integer(kind=i8_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (real(kind=r4_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (real(kind=r8_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + class default + call error("Unsupported variable type: "//trim(append_error_msg)) + end select + call check_netcdf_code(err, append_error_msg) + call unpack_data_2d(fileobj, varid, variable_name, buf) + endif + if (bcast) then + select type(buf) + type is (integer(kind=i4_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (integer(kind=i8_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (real(kind=r4_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (real(kind=r8_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + class default + call error("Unsupported variable type: "//trim(append_error_msg)) + end select + endif endif end subroutine netcdf_read_data_2d @@ -446,8 +468,12 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, & endif c(unlim_dim_index) = unlim_dim_level endif - if (fileobj%is_root) then + if(fileobj%use_collective) then varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) + ! NetCDF does not have the ability to specify collective I/O at + ! the file basis so we must activate at the variable level + err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) + call check_netcdf_code(err, append_error_msg) select type(buf) type is (integer(kind=i4_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) @@ -462,20 +488,38 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, & end select call check_netcdf_code(err, append_error_msg) call unpack_data_3d(fileobj, varid, variable_name, buf) - endif - if (bcast) then - select type(buf) - type is (integer(kind=i4_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (integer(kind=i8_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (real(kind=r4_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (real(kind=r8_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - class default - call error("Unsupported variable type: "//trim(append_error_msg)) - end select + else + if (fileobj%is_root) then + varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) + select type(buf) + type is (integer(kind=i4_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (integer(kind=i8_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (real(kind=r4_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (real(kind=r8_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + class default + call error("Unsupported variable type: "//trim(append_error_msg)) + end select + call check_netcdf_code(err, append_error_msg) + call unpack_data_3d(fileobj, varid, variable_name, buf) + endif + if (bcast) then + select type(buf) + type is (integer(kind=i4_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (integer(kind=i8_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (real(kind=r4_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (real(kind=r8_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + class default + call error("Unsupported variable type: "//trim(append_error_msg)) + end select + endif endif end subroutine netcdf_read_data_3d diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index b66c6f0526..07959401cc 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -149,6 +149,11 @@ module netcdf_io_mod character (len=20) :: time_name type(dimension_information) :: bc_dimensions ! MPP_COMM_NULL acts as an analagous mpp-macro for MPI_COMM_NULL to share with fms2_io NetCDF4 +!! mpi-io. The default value for the no-mpi case comes from Intel MPI and MPICH. OpenMPI sets +!! a default value of '2' +#if defined(use_libMPI) + integer, parameter :: MPP_COMM_NULL = MPI_COMM_NULL +#else + integer, parameter :: MPP_COMM_NULL = 67108864 +#endif + !*********************************************************************** ! variables needed for subroutine read_input_nml (include/mpp_util.inc) !