diff --git a/src/BlockOutput.h b/src/BlockOutput.h index 302a427a3..047bcb226 100644 --- a/src/BlockOutput.h +++ b/src/BlockOutput.h @@ -111,13 +111,13 @@ struct BlockAverages : OutputableBase { stepsPerOut = event.frequency; invSteps = 1.0 / stepsPerOut; firstInvSteps = invSteps; - //Handle the case where we are restarting from a checkpoint and the first - //interval is smaller than expected because we create a checkpoint more - //often than the Block output frequency. + // Handle the case where we are restarting from a checkpoint and the first + // interval is smaller than expected because we create a checkpoint more + // often than the Block output frequency. if (startStep != 0 && (startStep % stepsPerOut) != 0) { ulong diff; diff = stepsPerOut - (startStep % stepsPerOut); - firstInvSteps = 1.0/diff; + firstInvSteps = 1.0 / diff; } enableOut = event.enable; } diff --git a/src/ConfigSetup.cpp b/src/ConfigSetup.cpp index 5e087604d..7cd429c54 100644 --- a/src/ConfigSetup.cpp +++ b/src/ConfigSetup.cpp @@ -1897,7 +1897,6 @@ void ConfigSetup::verifyInputs(void) { std::cout << "ERROR: Impulse Pressure Correction cannot be " << "used with LJ long-range corrections." << std::endl; exit(EXIT_FAILURE); - } if (((sys.ff.VDW_KIND == sys.ff.VDW_SHIFT_KIND) || (sys.ff.VDW_KIND == sys.ff.VDW_SWITCH_KIND)) && @@ -1905,7 +1904,6 @@ void ConfigSetup::verifyInputs(void) { std::cout << "ERROR: Impulse Pressure Correction is not supported " << "for SWITCH or SHIFT potentials." << std::endl; exit(EXIT_FAILURE); - } if (sys.ff.doImpulsePressureCorr && sys.ff.doTailCorr) { std::cout << "ERROR: Both LRC (Long Range Correction) and " @@ -2104,9 +2102,10 @@ void ConfigSetup::verifyInputs(void) { if (in.restart.restartFromBinaryCoorFile) { for (i = 0; i < BOX_TOTAL; i++) { if (!in.files.binaryCoorInput.defined[i]) { - std::cout << "ERROR: Binary coordinate file was not specified for box " - "number " - << i << "!" << std::endl; + std::cout + << "ERROR: Binary coordinate file was not specified for box " + "number " + << i << "!" << std::endl; exit(EXIT_FAILURE); } } @@ -2174,25 +2173,30 @@ void ConfigSetup::verifyInputs(void) { if ((sys.memcVal.MEMC1 && sys.memcVal.MEMC2) || (sys.memcVal.MEMC1 && sys.memcVal.MEMC3) || (sys.memcVal.MEMC2 && sys.memcVal.MEMC3)) { - std::cout << "ERROR: Multiple MEMC methods were specified, but only one is allowed!\n"; + std::cout << "ERROR: Multiple MEMC methods were specified, but only one " + "is allowed!\n"; exit(EXIT_FAILURE); } if ((sys.intraMemcVal.MEMC1 && sys.intraMemcVal.MEMC2) || (sys.intraMemcVal.MEMC1 && sys.intraMemcVal.MEMC3) || (sys.intraMemcVal.MEMC2 && sys.intraMemcVal.MEMC3)) { - std::cout << "ERROR: Multiple Intra-MEMC methods are specified, but only one is allowed!\n"; + std::cout << "ERROR: Multiple Intra-MEMC methods are specified, but only " + "one is allowed!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readVol || !sys.intraMemcVal.readVol) { - std::cout << "ERROR: In the MEMC method, the Sub-Volume was not specified!\n"; + std::cout + << "ERROR: In the MEMC method, the Sub-Volume was not specified!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readRatio || !sys.intraMemcVal.readRatio) { - std::cout << "ERROR: In the MEMC method, Exchange Ratio was not specified!\n"; + std::cout + << "ERROR: In the MEMC method, Exchange Ratio was not specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.largeKind.size() != sys.memcVal.exchangeRatio.size()) { - std::cout << "ERROR: In the MEMC method, the specified number of Large Kinds was " + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds was " << sys.memcVal.largeKind.size() << ", but " << sys.memcVal.exchangeRatio.size() << " exchange ratio was specified!\n"; @@ -2209,49 +2213,52 @@ void ConfigSetup::verifyInputs(void) { if ((sys.memcVal.largeKind.size() != sys.memcVal.smallKind.size()) || (sys.intraMemcVal.largeKind.size() != sys.intraMemcVal.smallKind.size())) { - std::cout - << "ERROR: In the MEMC method, the specified number of Large Kinds is not " - << " equal as specified number of Small Kinds!\n"; + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds is not " + << " equal as specified number of Small Kinds!\n"; exit(EXIT_FAILURE); } if (!sys.memcVal.readLargeBB || !sys.intraMemcVal.readLargeBB) { - std::cout - << "ERROR: In the MEMC method, Large Kind BackBone was not specified!\n"; + std::cout << "ERROR: In the MEMC method, Large Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.largeKind.size() != sys.memcVal.largeBBAtom1.size()) { - std::cout << "ERROR: In the MEMC method, the specified number of Large Kinds was " + std::cout << "ERROR: In the MEMC method, the specified number of Large " + "Kinds was " << sys.memcVal.largeKind.size() << ", but " << sys.memcVal.largeBBAtom1.size() << " sets of Large Molecule BackBone was specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.MEMC2 && !sys.memcVal.readSmallBB) { - std::cout - << "ERROR: In the MEMC-2 method, Small Kind BackBone was not specified!\n"; + std::cout << "ERROR: In the MEMC-2 method, Small Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.MEMC2 && (sys.memcVal.smallKind.size() != sys.memcVal.smallBBAtom1.size())) { - std::cout - << "ERROR: In the MEMC-2 method, the specified number of Small Kinds was " - << sys.memcVal.smallKind.size() << ", but " - << sys.memcVal.smallBBAtom1.size() - << " sets of Small Molecule BackBone was specified!\n"; + std::cout << "ERROR: In the MEMC-2 method, the specified number of Small " + "Kinds was " + << sys.memcVal.smallKind.size() << ", but " + << sys.memcVal.smallBBAtom1.size() + << " sets of Small Molecule BackBone was specified!\n"; exit(EXIT_FAILURE); } if (sys.intraMemcVal.MEMC2 && !sys.intraMemcVal.readSmallBB) { - std::cout << "ERROR: In the Intra-MEMC-2 method, Small Kind BackBone was not " - "specified!\n"; + std::cout + << "ERROR: In the Intra-MEMC-2 method, Small Kind BackBone was not " + "specified!\n"; exit(EXIT_FAILURE); } if (sys.memcVal.enable && sys.intraMemcVal.enable) { if ((sys.memcVal.MEMC1 && !sys.intraMemcVal.MEMC1) || (sys.memcVal.MEMC2 && !sys.intraMemcVal.MEMC2) || (sys.memcVal.MEMC3 && !sys.intraMemcVal.MEMC3)) { - std::cout << "ERROR: The selected intra-MEMC method was not same as the inter-MEMC method!\n"; + std::cout << "ERROR: The selected intra-MEMC method was not same as " + "the inter-MEMC method!\n"; exit(EXIT_FAILURE); } } diff --git a/src/GPU/CUDAMemoryManager.cuh b/src/GPU/CUDAMemoryManager.cuh index 0b5b52dcf..06e476a9d 100644 --- a/src/GPU/CUDAMemoryManager.cuh +++ b/src/GPU/CUDAMemoryManager.cuh @@ -2,28 +2,31 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA #include #include -#include #include +#include -#define CUMALLOC(address,size) CUDAMemoryManager::mallocMemory(address,size,#address) -#define CUFREE(address) CUDAMemoryManager::freeMemory(address,#address) +#define CUMALLOC(address, size) \ + CUDAMemoryManager::mallocMemory(address, size, #address) +#define CUFREE(address) CUDAMemoryManager::freeMemory(address, #address) -class CUDAMemoryManager -{ +class CUDAMemoryManager { public: - static cudaError_t mallocMemory(void **address, unsigned int size, std::string var_name); + static cudaError_t mallocMemory(void **address, unsigned int size, + std::string var_name); static cudaError_t freeMemory(void *address, std::string var_name); static bool isFreed(); private: static long long totalAllocatedBytes; - static std::unordered_map > allocatedPointers; + static std::unordered_map> + allocatedPointers; }; #endif diff --git a/src/GPU/CalculateEnergyCUDAKernel.cuh b/src/GPU/CalculateEnergyCUDAKernel.cuh index 9f1366400..b0367cc3c 100644 --- a/src/GPU/CalculateEnergyCUDAKernel.cuh +++ b/src/GPU/CalculateEnergyCUDAKernel.cuh @@ -2,93 +2,54 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA +#include "BoxDimensions.h" +#include "VariablesCUDA.cuh" +#include "XYZArray.h" #include #include #include -#include "XYZArray.h" -#include "BoxDimensions.h" -#include "VariablesCUDA.cuh" -void CallBoxInterGPU(VariablesCUDA *vars, - const std::vector &cellVector, +void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, const std::vector &cellStartIndex, - const std::vector > &neighborList, - XYZArray const &coords, - BoxDimensions const &boxAxes, + const std::vector> &neighborList, + XYZArray const &coords, BoxDimensions const &boxAxes, bool electrostatic, const std::vector &particleCharge, const std::vector &particleKind, - const std::vector &particleMol, - double &REn, - double &LJEn, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - uint const box); - -__global__ void BoxInterGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_REn, - double *gpu_LJEn, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - int *gpu_ewald, - double *gpu_diElectric_1, - int *gpu_nonOrth, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); + const std::vector &particleMol, double &REn, + double &LJEn, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, uint const box); +__global__ void +BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, double *gpu_x, double *gpu_y, double *gpu_z, + double3 axis, double3 halfAx, bool electrostatic, + double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, + double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, + double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex, + double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, + bool *gpu_isFraction, int box); -__device__ double CalcCoulombGPU(double distSq, int kind1, int kind2, - double qi_qj_fact, double gpu_rCutLow, - int gpu_ewald, int gpu_VDW_Kind, - double gpu_alpha, double gpu_rCutCoulomb, - int gpu_isMartini, double gpu_diElectric_1, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq, - int gpu_count); +__device__ double +CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact, + double gpu_rCutLow, int gpu_ewald, int gpu_VDW_Kind, + double gpu_alpha, double gpu_rCutCoulomb, int gpu_isMartini, + double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, + double *gpu_sigmaSq, int gpu_count); __device__ double CalcCoulombVirGPU(double distSq, double qi_qj, double gpu_rCutCoulomb, double gpu_alpha, int gpu_VDW_Kind, int gpu_ewald, @@ -102,78 +63,74 @@ __device__ double CalcEnGPU(double distSq, int kind1, int kind2, double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst); -//ElectroStatic Calculation +// ElectroStatic Calculation //**************************************************************// -__device__ double CalcCoulombParticleGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq); +__device__ double CalcCoulombParticleGPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, + double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombParticleGPUNoLambda(double distSq, - double qi_qj_fact, - int gpu_ewald, - double gpu_alpha); -__device__ double CalcCoulombShiftGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut, double gpu_lambdaCoulomb, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double *gpu_sigmaSq); + double qi_qj_fact, + int gpu_ewald, + double gpu_alpha); +__device__ double CalcCoulombShiftGPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, double gpu_rCut, + double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombShiftGPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut); -__device__ double CalcCoulombExp6GPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq); + int gpu_ewald, double gpu_alpha, + double gpu_rCut); +__device__ double CalcCoulombExp6GPU(double distSq, int index, + double qi_qj_fact, int gpu_ewald, + double gpu_alpha, double gpu_lambdaCoulomb, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, + double *gpu_sigmaSq); __device__ double CalcCoulombExp6GPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha); -__device__ double CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut, - double gpu_diElectric_1, - double gpu_lambdaCoulomb, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double *gpu_sigmaSq); -__device__ double CalcCoulombSwitchMartiniGPUNoLambda(double distSq, - double qi_qj_fact, - int gpu_ewald, - double gpu_alpha, - double gpu_rCut, - double gpu_diElectric_1); -__device__ double CalcCoulombSwitchGPU(double distSq, int index, double qi_qj_fact, - double gpu_alpha, int gpu_ewald, - double gpu_rCut, + int gpu_ewald, double gpu_alpha); +__device__ double +CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, + int gpu_ewald, double gpu_alpha, double gpu_rCut, + double gpu_diElectric_1, double gpu_lambdaCoulomb, + bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq); +__device__ double +CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact, + int gpu_ewald, double gpu_alpha, + double gpu_rCut, double gpu_diElectric_1); +__device__ double CalcCoulombSwitchGPU(double distSq, int index, + double qi_qj_fact, double gpu_alpha, + int gpu_ewald, double gpu_rCut, double gpu_lambdaCoulomb, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombSwitchGPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, double gpu_rCut); - + int gpu_ewald, double gpu_alpha, + double gpu_rCut); -//VDW Calculation +// VDW Calculation //*****************************************************************// __device__ double CalcEnParticleGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power); + double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power); __device__ double CalcEnParticleGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn); __device__ double CalcEnShiftGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power); __device__ double CalcEnShiftGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, - double *gpu_n, double *gpu_epsilon_Cn, - double gpu_rCut); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut); __device__ double CalcEnExp6GPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, @@ -181,28 +138,23 @@ __device__ double CalcEnExp6GPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_rMaxSq, double *gpu_expConst); __device__ double CalcEnExp6GPUNoLambda(double distSq, int index, double *gpu_n, double *gpu_rMin, double *gpu_expConst); -__device__ double CalcEnSwitchMartiniGPU(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_rOn, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power); -__device__ double CalcEnSwitchMartiniGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, - double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, - double gpu_rOn); +__device__ double +CalcEnSwitchMartiniGPU(double distSq, int index, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, + double gpu_rOn, double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power); +__device__ double +CalcEnSwitchMartiniGPUNoLambda(double distSq, int index, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, + double gpu_rCut, double gpu_rOn); __device__ double CalcEnSwitchGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, double gpu_rOn, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power); __device__ double CalcEnSwitchGPUNoLambda(double distSq, int index, - double *gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_rOn); + double *gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double gpu_rOn); #endif /*GOMC_CUDA*/ diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu index 164091427..8c92d9f26 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cu +++ b/src/GPU/CalculateEwaldCUDAKernel.cu @@ -558,14 +558,15 @@ __global__ void BoxForceReciprocalGPU( double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, double *gpu_particleCharge, int *gpu_particleMol, bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, - int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, double constValue, - int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz, - double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact, - double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction, - int *gpu_molIndex, double *gpu_lambdaCoulomb, double *gpu_cell_x, - double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, - double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, double axx, - double axy, double axz, int box, int atomCount) { + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, + double constValue, int imageSize, double *gpu_kx, double *gpu_ky, + double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, + double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, + bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb, + double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, + double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, + int *gpu_nonOrth, double axx, double axy, double axz, int box, + int atomCount) { __shared__ double shared_kvector[IMAGES_PER_BLOCK * 3]; int particleID = blockDim.x * blockIdx.x + threadIdx.x; int offset_vector_index = blockIdx.y * IMAGES_PER_BLOCK; @@ -631,7 +632,8 @@ __global__ void BoxForceReciprocalGPU( double qiqj = gpu_particleCharge[particleID] * gpu_particleCharge[otherParticle] * qqFactGPU; intraForce = qiqj * lambdaCoef * lambdaCoef / distSq; - intraForce *= ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); + intraForce *= + ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue); forceX -= intraForce * distVect.x; forceY -= intraForce * distVect.y; forceZ -= intraForce * distVect.z; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh index 80062586b..638d5662c 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cuh +++ b/src/GPU/CalculateEwaldCUDAKernel.cuh @@ -2,189 +2,112 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CALCULATE_EWALD_CUDA_KERNEL_H #define CALCULATE_EWALD_CUDA_KERNEL_H #ifdef GOMC_CUDA +#include "VariablesCUDA.cuh" +#include "XYZArray.h" #include #include -#include "VariablesCUDA.cuh" #include -#include "XYZArray.h" -void CallBoxForceReciprocalGPU(VariablesCUDA *vars, - XYZArray &atomForceRec, - XYZArray &molForceRec, - const std::vector &particleCharge, - const std::vector &particleMol, - const std::vector &particleHasNoCharge, - const bool *particleUsed, - const std::vector &startMol, - const std::vector &lengthMol, - double constValue, - uint imageSize, - XYZArray const &molCoords, - BoxDimensions const &boxAxes, - int box); +void CallBoxForceReciprocalGPU( + VariablesCUDA *vars, XYZArray &atomForceRec, XYZArray &molForceRec, + const std::vector &particleCharge, + const std::vector &particleMol, + const std::vector &particleHasNoCharge, const bool *particleUsed, + const std::vector &startMol, const std::vector &lengthMol, + double constValue, uint imageSize, XYZArray const &molCoords, + BoxDimensions const &boxAxes, int box); -void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, - XYZArray const &coords, - double const *kx, - double const *ky, +void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords, + double const *kx, double const *ky, double const *kz, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double *prefact, - double *hsqr, - double &energyRecip, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double *prefact, double *hsqr, + double &energyRecip, uint box); -void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, - XYZArray const &coords, +void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecip, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double &energyRecip, uint box); -void CallMolReciprocalGPU(VariablesCUDA *vars, - XYZArray const ¤tCoords, +void CallMolReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, XYZArray const &newCoords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecipNew, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + double &energyRecipNew, uint box); -//Calculate reciprocal term for lambdaNew and Old with same coordinates +// Calculate reciprocal term for lambdaNew and Old with same coordinates void CallChangeLambdaMolReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - double &energyRecipNew, - const double lambdaCoef, - uint box); + uint imageSize, double *sumRnew, + double *sumInew, double &energyRecipNew, + const double lambdaCoef, uint box); -void CallSwapReciprocalGPU(VariablesCUDA *vars, - XYZArray const &coords, +void CallSwapReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords, const std::vector &particleCharge, - uint imageSize, - double *sumRnew, - double *sumInew, - const bool insert, - double &energyRecipNew, - uint box); + uint imageSize, double *sumRnew, double *sumInew, + const bool insert, double &energyRecipNew, uint box); -void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, - uint imageSize, - double *sumRnew, - double *sumInew, - uint box); +void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, + double *sumRnew, double *sumInew, uint box); -__global__ void BoxForceReciprocalGPU(double *gpu_aForceRecx, - double *gpu_aForceRecy, - double *gpu_aForceRecz, - double *gpu_mForceRecx, - double *gpu_mForceRecy, - double *gpu_mForceRecz, - double *gpu_particleCharge, - int *gpu_particleMol, - bool *gpu_particleHasNoCharge, - bool *gpu_particleUsed, - int *gpu_startMol, - int *gpu_lengthMol, - double *gpu_alpha, - double *gpu_alphaSq, - double constValue, - int imageSize, - double *gpu_kx, - double *gpu_ky, - double *gpu_kz, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_prefact, - double *gpu_sumRnew, - double *gpu_sumInew, - bool *gpu_isFraction, - int *gpu_molIndex, - double *gpu_lambdaCoulomb, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - int *gpu_nonOrth, - double axx, - double axy, - double axz, - int box, - int atomCount); +__global__ void BoxForceReciprocalGPU( + double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz, + double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, + double *gpu_particleCharge, int *gpu_particleMol, + bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol, + int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, + double constValue, int imageSize, double *gpu_kx, double *gpu_ky, + double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, + double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, + bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb, + double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, + double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, + int *gpu_nonOrth, double axx, double axy, double axz, int box, + int atomCount); -__global__ void BoxReciprocalSumsGPU(double * gpu_x, - double * gpu_y, - double * gpu_z, - double * gpu_kx, - double * gpu_ky, - double * gpu_kz, - int atomNumber, - double * gpu_particleCharge, - double * gpu_sumRnew, - double * gpu_sumInew, +__global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y, + double *gpu_z, double *gpu_kx, + double *gpu_ky, double *gpu_kz, + int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, int imageSize); __global__ void MolReciprocalGPU(double *gpu_cx, double *gpu_cy, double *gpu_cz, double *gpu_nx, double *gpu_ny, double *gpu_nz, double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, - double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, + int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, + double *gpu_sumRref, double *gpu_sumIref, double *gpu_prefactRef, - double *gpu_energyRecipNew, - int imageSize); + double *gpu_energyRecipNew, int imageSize); -__global__ void ChangeLambdaMolReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z, - double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, - double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_prefactRef, - double *gpu_energyRecipNew, - double lambdaCoef, - int imageSize); +__global__ void ChangeLambdaMolReciprocalGPU( + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_kx, double *gpu_ky, + double *gpu_kz, int atomNumber, double *gpu_particleCharge, + double *gpu_sumRnew, double *gpu_sumInew, double *gpu_sumRref, + double *gpu_sumIref, double *gpu_prefactRef, double *gpu_energyRecipNew, + double lambdaCoef, int imageSize); __global__ void SwapReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z, - double *gpu_kx, double *gpu_ky, double *gpu_kz, - int atomNumber, + double *gpu_kx, double *gpu_ky, + double *gpu_kz, int atomNumber, double *gpu_particleCharge, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_prefactRef, - const bool insert, - double *gpu_energyRecipNew, - int imageSize); + double *gpu_sumRnew, double *gpu_sumInew, + double *gpu_sumRref, double *gpu_sumIref, + double *gpu_prefactRef, const bool insert, + double *gpu_energyRecipNew, int imageSize); -__global__ void BoxReciprocalGPU(double *gpu_prefact, - double *gpu_sumRnew, - double *gpu_sumInew, - double *gpu_energyRecip, +__global__ void BoxReciprocalGPU(double *gpu_prefact, double *gpu_sumRnew, + double *gpu_sumInew, double *gpu_energyRecip, int imageSize); #endif /*GOMC_CUDA*/ diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu index 219a6cafb..79360471a 100644 --- a/src/GPU/CalculateForceCUDAKernel.cu +++ b/src/GPU/CalculateForceCUDAKernel.cu @@ -125,13 +125,14 @@ void CallBoxInterForceGPU( vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, - vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box], - vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], - vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, - sc_coul, sc_sigma_6, sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq, - vars->gpu_expConst, vars->gpu_molIndex, vars->gpu_lambdaVDW, - vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box); + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, + vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], + vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + vars->gpu_Invcell_z[box], vars->gpu_nonOrth, sc_coul, sc_sigma_6, + sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst, + vars->gpu_molIndex, vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, + vars->gpu_isFraction, box); checkLastErrorCUDA(__FILE__, __LINE__); cudaDeviceSynchronize(); // ReduceSum // Virial of LJ @@ -302,10 +303,10 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, gpu_particleKind, gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, - vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, - vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], - vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, + vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box], + vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_aForcex, vars->gpu_aForcey, vars->gpu_aForcez, vars->gpu_mForcex, vars->gpu_mForcey, vars->gpu_mForcez, sc_coul, sc_sigma_6, sc_alpha, sc_power, @@ -577,9 +578,9 @@ __global__ void BoxInterForceGPU( mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); double pRF = CalcCoulombForceGPU( distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0], - gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], - gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, - lambdaCoulomb, gpu_count[0], kA, kB); + gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], + gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, + sc_power, lambdaCoulomb, gpu_count[0], kA, kB); gpu_rT11[threadID] += pRF * (virComponents.x * diff_com.x); gpu_rT22[threadID] += pRF * (virComponents.y * diff_com.y); @@ -599,25 +600,24 @@ __global__ void BoxInterForceGPU( } } -__global__ void -BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, - int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, - double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, - double3 halfAx, bool electrostatic, double *gpu_particleCharge, - int *gpu_particleKind, int *gpu_particleMol, double *gpu_REn, - double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, - int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, - double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, - int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - double *gpu_aForcex, double *gpu_aForcey, double *gpu_aForcez, - double *gpu_mForcex, double *gpu_mForcey, double *gpu_mForcez, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, - int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, - bool *gpu_isFraction, int box) { +__global__ void BoxForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, + double *gpu_rCutCoulomb, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, double *gpu_aForcex, + double *gpu_aForcey, double *gpu_aForcez, double *gpu_mForcex, + double *gpu_mForcey, double *gpu_mForcez, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box) { __shared__ double shr_cutoff; __shared__ int shr_particlesInsideCurrentCell, shr_numberOfPairs; __shared__ int shr_currentCellStartIndex, shr_neighborCellStartIndex; @@ -708,9 +708,10 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, forces += CalcCoulombForceGPU( distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0], - gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], - gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, - sc_power, lambdaCoulomb, gpu_count[0], kA, kB); + gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], + gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, + sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, + kB); } } @@ -868,12 +869,14 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_alphaSq, int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { @@ -886,15 +889,18 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirParticleGPU(distSq, qi_qj, + gpu_ewald, gpu_alpha, + gpu_alphaSq); } } __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -909,13 +915,15 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { @@ -928,15 +936,17 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, + gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -950,13 +960,15 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq; @@ -968,15 +980,17 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq); + return gpu_lambdaCoulomb * CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, + gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -990,13 +1004,14 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchMartiniGPU( - double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb) { + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, + gpu_diElectric_1); } if (sc_coul) { @@ -1009,21 +1024,20 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, + gpu_diElectric_1); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCut, gpu_diElectric_1); + return gpu_lambdaCoulomb * CalcCoulombVirSwitchMartiniGPU( + distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } } -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, - double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1) { +__device__ double +CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, + double gpu_rCut, double gpu_diElectric_1) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) @@ -1054,11 +1068,11 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, int index, - double gpu_sigmaSq, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, + int index, double gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, @@ -1079,7 +1093,8 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, gpu_alphaSq, gpu_rCut); } else { return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, - gpu_alpha, gpu_alphaSq, gpu_rCut); + gpu_alpha, gpu_alphaSq, + gpu_rCut); } } diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh index 072939c24..87aaca0c8 100644 --- a/src/GPU/CalculateForceCUDAKernel.cuh +++ b/src/GPU/CalculateForceCUDAKernel.cuh @@ -2,407 +2,244 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CALCULATE_FORCE_CUDA_KERNEL_H #define CALCULATE_FORCE_CUDA_KERNEL_H #ifdef GOMC_CUDA -#include -#include "XYZArray.h" #include "BoxDimensions.h" -#include "VariablesCUDA.cuh" -#include "ConstantDefinitionsCUDAKernel.cuh" #include "CalculateMinImageCUDAKernel.cuh" +#include "ConstantDefinitionsCUDAKernel.cuh" +#include "VariablesCUDA.cuh" +#include "XYZArray.h" +#include -void CallBoxForceGPU(VariablesCUDA *vars, - const std::vector &cellVector, +void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, const std::vector &cellStartIndex, - const std::vector > &neighborList, + const std::vector> &neighborList, const std::vector &mapParticleToCell, - XYZArray const &coords, - BoxDimensions const &boxAxes, + XYZArray const &coords, BoxDimensions const &boxAxes, bool electrostatic, const std::vector &particleCharge, const std::vector &particleKind, - const std::vector &particleMol, - double &REn, - double &LJEn, - double *aForcex, - double *aForcey, - double *aForcez, - double *mForcex, - double *mForcey, - double *mForcez, - int atomCount, - int molCount, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, + const std::vector &particleMol, double &REn, + double &LJEn, double *aForcex, double *aForcey, + double *aForcez, double *mForcex, double *mForcey, + double *mForcez, int atomCount, int molCount, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, uint const box); -void CallBoxInterForceGPU(VariablesCUDA *vars, - const std::vector &cellVector, - const std::vector &cellStartIndex, - const std::vector > &neighborList, - const std::vector &mapParticleToCell, - XYZArray const ¤tCoords, - XYZArray const ¤tCOM, - BoxDimensions const& boxAxes, - bool electrostatic, - const std::vector &particleCharge, - const std::vector &particleKind, - const std::vector &particleMol, - double &rT11, - double &rT12, - double &rT13, - double &rT22, - double &rT23, - double &rT33, - double &vT11, - double &vT12, - double &vT13, - double &vT22, - double &vT23, - double &vT33, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - uint const box); +void CallBoxInterForceGPU( + VariablesCUDA *vars, const std::vector &cellVector, + const std::vector &cellStartIndex, + const std::vector> &neighborList, + const std::vector &mapParticleToCell, XYZArray const ¤tCoords, + XYZArray const ¤tCOM, BoxDimensions const &boxAxes, + bool electrostatic, const std::vector &particleCharge, + const std::vector &particleKind, const std::vector &particleMol, + double &rT11, double &rT12, double &rT13, double &rT22, double &rT23, + double &rT33, double &vT11, double &vT12, double &vT13, double &vT22, + double &vT23, double &vT33, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, uint const box); -void CallVirialReciprocalGPU(VariablesCUDA *vars, - XYZArray const ¤tCoords, +void CallVirialReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, XYZArray const ¤tCOMDiff, const std::vector &particleCharge, - double &rT11, - double &rT12, - double &rT13, - double &rT22, - double &rT23, - double &rT33, - uint imageSize, - double constVal, - uint box); + double &rT11, double &rT12, double &rT13, + double &rT22, double &rT23, double &rT33, + uint imageSize, double constVal, uint box); -__global__ void BoxForceGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - int atomNumber, - int *gpu_mapParticleToCell, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_REn, - double *gpu_LJEn, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - double *gpu_alphaSq, - int *gpu_ewald, - double *gpu_diElectric_1, - int *gpu_nonOrth, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - double *gpu_aForcex, - double *gpu_aForcey, - double *gpu_aForcez, - double *gpu_mForcex, - double *gpu_mForcey, - double *gpu_mForcez, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); +__global__ void BoxForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, + double *gpu_rCutCoulomb, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, + double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, + double *gpu_Invcell_y, double *gpu_Invcell_z, double *gpu_aForcex, + double *gpu_aForcey, double *gpu_aForcez, double *gpu_mForcex, + double *gpu_mForcey, double *gpu_mForcez, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box); -__global__ void BoxInterForceGPU(int *gpu_cellStartIndex, - int *gpu_cellVector, - int *gpu_neighborList, - int numberOfCells, - int atomNumber, - int *gpu_mapParticleToCell, - double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_comx, - double *gpu_comy, - double *gpu_comz, - double3 axis, - double3 halfAx, - bool electrostatic, - double *gpu_particleCharge, - int *gpu_particleKind, - int *gpu_particleMol, - double *gpu_rT11, - double *gpu_rT12, - double *gpu_rT13, - double *gpu_rT22, - double *gpu_rT23, - double *gpu_rT33, - double *gpu_vT11, - double *gpu_vT12, - double *gpu_vT13, - double *gpu_vT22, - double *gpu_vT23, - double *gpu_vT33, - double *gpu_sigmaSq, - double *gpu_epsilon_Cn, - double *gpu_n, - int *gpu_VDW_Kind, - int *gpu_isMartini, - int *gpu_count, - double *gpu_rCut, - double *gpu_rCutCoulomb, - double *gpu_rCutLow, - double *gpu_rOn, - double *gpu_alpha, - double *gpu_alphaSq, - int *gpu_ewald, - double *gpu_diElectric_1, - double *gpu_cell_x, - double *gpu_cell_y, - double *gpu_cell_z, - double *gpu_Invcell_x, - double *gpu_Invcell_y, - double *gpu_Invcell_z, - int *gpu_nonOrth, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst, - int *gpu_molIndex, - double *gpu_lambdaVDW, - double *gpu_lambdaCoulomb, - bool *gpu_isFraction, - int box); +__global__ void BoxInterForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_comx, + double *gpu_comy, double *gpu_comz, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_rT11, double *gpu_rT12, double *gpu_rT13, + double *gpu_rT22, double *gpu_rT23, double *gpu_rT33, double *gpu_vT11, + double *gpu_vT12, double *gpu_vT13, double *gpu_vT22, double *gpu_vT23, + double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn, + double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, + double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, + double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, + double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, + double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, + double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box); -__global__ void VirialReciprocalGPU(double *gpu_x, - double *gpu_y, - double *gpu_z, - double *gpu_comDx, - double *gpu_comDy, - double *gpu_comDz, - double *gpu_kxRef, - double *gpu_kyRef, - double *gpu_kzRef, - double *gpu_prefactRef, - double *gpu_hsqrRef, - double *gpu_sumRref, - double *gpu_sumIref, - double *gpu_particleCharge, - double *gpu_rT11, - double *gpu_rT12, - double *gpu_rT13, - double *gpu_rT22, - double *gpu_rT23, - double *gpu_rT33, - double constVal, - uint imageSize, - uint atomNumber); +__global__ void VirialReciprocalGPU( + double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_comDx, + double *gpu_comDy, double *gpu_comDz, double *gpu_kxRef, double *gpu_kyRef, + double *gpu_kzRef, double *gpu_prefactRef, double *gpu_hsqrRef, + double *gpu_sumRref, double *gpu_sumIref, double *gpu_particleCharge, + double *gpu_rT11, double *gpu_rT12, double *gpu_rT13, double *gpu_rT22, + double *gpu_rT23, double *gpu_rT33, double constVal, uint imageSize, + uint atomNumber); -__device__ double CalcEnForceGPU(double distSq, int kind1, int kind2, - double *gpu_sigmaSq, - double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, - double gpu_rOn, - int gpu_isMartini, - int gpu_VDW_Kind, - int gpu_count, - double gpu_lambdaVDW, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double *gpu_rMin, - double *gpu_rMaxSq, - double *gpu_expConst); +__device__ double +CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, + double gpu_rOn, int gpu_isMartini, int gpu_VDW_Kind, + int gpu_count, double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst); -//ElectroStatic Calculation +// ElectroStatic Calculation //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); -__device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); +__device__ double +CalcCoulombVirExp6GPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq); -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1, - int index, - double gpu_sigmaSq, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb); -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, - double gpu_diElectric_1); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); +__device__ double CalcCoulombVirSwitchMartiniGPU( + double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb); +__device__ double +CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, + double gpu_rCut, double gpu_diElectric_1); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut, int index, - double gpu_sigmaSq, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut, + int index, double gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, + double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha, double gpu_alphaSq, - double gpu_rCut); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq, double gpu_rCut); -//VDW Calculation +// VDW Calculation //*****************************************************************// __device__ double CalcVirParticleGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double sc_sigma_6, + double *gpu_epsilon_Cn, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirParticleGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn); -__device__ double CalcVirShiftGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, +__device__ double CalcVirShiftGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn, double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); -__device__ double CalcVirShiftGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn); -__device__ double CalcVirExp6GPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_rMin, double *gpu_rMaxSq, - double *gpu_expConst, + uint sc_power, double gpu_lambdaVDW); +__device__ double CalcVirShiftGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_epsilon_Cn); +__device__ double CalcVirExp6GPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_n, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst, double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); + uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirExp6GPU(double distSq, int index, double *gpu_n, double *gpu_rMin, double *gpu_expConst); __device__ double CalcVirSwitchMartiniGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double rOn, - double sc_sigma_6, double sc_alpha, - uint sc_power, - double gpu_lambdaVDW); + double gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double rOn, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirSwitchMartiniGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_n, - double *gpu_epsilon_Cn, - double gpu_rCut, double rOn); -__device__ double CalcVirSwitchGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, double gpu_rCut, - double gpu_rOn, + double gpu_sigmaSq, double *gpu_n, + double *gpu_epsilon_Cn, + double gpu_rCut, double rOn); +__device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_epsilon_Cn, double *gpu_n, + double gpu_rCut, double gpu_rOn, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaVDW); -__device__ double CalcVirSwitchGPU(double distSq, int index, - double gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, double gpu_rCut, - double gpu_rOn); - +__device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, + double *gpu_epsilon_Cn, double *gpu_n, + double gpu_rCut, double gpu_rOn); // Have to move the implementation for some functions here // since CUDA doesn't allow __global__ to call __device__ // from different files // Wanted to call CalcCoulombForceGPU() from CalculateEnergyCUDAKernel.cu file -__device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, - int gpu_VDW_Kind, int gpu_ewald, - int gpu_isMartini, - double gpu_alpha, double gpu_alphaSq, - double gpu_rCutCoulomb, - double gpu_diElectric_1, - double *gpu_sigmaSq, - bool sc_coul, - double sc_sigma_6, - double sc_alpha, - uint sc_power, - double gpu_lambdaCoulomb, - int gpu_count, int kind1, - int kind2) -{ - if((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) { +__device__ inline double CalcCoulombForceGPU( + double distSq, double qi_qj, int gpu_VDW_Kind, int gpu_ewald, + int gpu_isMartini, double gpu_alpha, double gpu_alphaSq, + double gpu_rCutCoulomb, double gpu_diElectric_1, double *gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, + double gpu_lambdaCoulomb, int gpu_count, int kind1, int kind2) { + if ((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) { return 0.0; } int index = FlatIndexGPU(kind1, kind2, gpu_count); - if(gpu_VDW_Kind == GPU_VDW_STD_KIND) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_lambdaCoulomb); - } else if(gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { - return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCutCoulomb, gpu_diElectric_1, - index, gpu_sigmaSq[index], sc_coul, - sc_sigma_6, sc_alpha, sc_power, - gpu_lambdaCoulomb); + if (gpu_VDW_Kind == GPU_VDW_STD_KIND) { + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); + } else if (gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { + return CalcCoulombVirSwitchMartiniGPU( + distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, + gpu_diElectric_1, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_lambdaCoulomb); } else - return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, - gpu_rCutCoulomb, index, gpu_sigmaSq[index], sc_coul, - sc_sigma_6, sc_alpha, sc_power, - gpu_lambdaCoulomb); + return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCutCoulomb, index, + gpu_sigmaSq[index], sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_lambdaCoulomb); } - #endif /*GOMC_CUDA*/ #endif /*CALCULATE_FORCE_CUDA_KERNEL_H*/ diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh index 3584e46c2..c5cabf281 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh @@ -2,17 +2,18 @@ GPU OPTIMIZED MONTE CARLO (GOMC) 2.75 Copyright (C) 2022 GOMC Group A copy of the MIT License can be found in License.txt -along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL_H #define CONSTANT_DEFINITIONS_CUDA_KERNEL_H #ifdef GOMC_CUDA -#include -#include +#include "EnsemblePreprocessor.h" #include "GeomLib.h" #include "VariablesCUDA.cuh" -#include "EnsemblePreprocessor.h" +#include +#include #define GPU_VDW_STD_KIND 0 #define GPU_VDW_SHIFT_KIND 1 @@ -21,12 +22,12 @@ along with this program, also can be found at . +along with this program, also can be found at +. ********************************************************************************/ #pragma once #ifdef GOMC_CUDA -#include -#include -#include #include "EnsemblePreprocessor.h" #include "NumLib.h" +#include +#include +#include -//Need a separate float constant for device code with the MSVC compiler -//See CUDA Programming Guide section I.4.13 for details +// Need a separate float constant for device code with the MSVC compiler +// See CUDA Programming Guide section I.4.13 for details static const __device__ double qqFactGPU = num::qqFact; -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) -{ +#define gpuErrchk(ans) \ + { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, + bool abort = true) { if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, + line); + if (abort) + exit(code); } } -inline void checkLastErrorCUDA(const char *file, int line) -{ +inline void checkLastErrorCUDA(const char *file, int line) { cudaError_t code = cudaGetLastError(); if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, + line); exit(code); } } -inline void printFreeMemory() -{ - size_t free_byte ; - size_t total_byte ; - cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ; +inline void printFreeMemory() { + size_t free_byte; + size_t total_byte; + cudaError_t cuda_status = cudaMemGetInfo(&free_byte, &total_byte); - if ( cudaSuccess != cuda_status ) { + if (cudaSuccess != cuda_status) { printf("Error: cudaMemGetInfo fails, %s \n", - cudaGetErrorString(cuda_status) ); + cudaGetErrorString(cuda_status)); exit(1); } - double free_db = (double)free_byte ; - double total_db = (double)total_byte ; - double used_db = total_db - free_db ; + double free_db = (double)free_byte; + double total_db = (double)total_byte; + double used_db = total_db - free_db; printf("GPU memory usage: used = %f, free = %f MB, total = %f MB\n", - used_db / 1024.0 / 1024.0, free_db / 1024.0 / 1024.0, total_db / 1024.0 / 1024.0); + used_db / 1024.0 / 1024.0, free_db / 1024.0 / 1024.0, + total_db / 1024.0 / 1024.0); } -class VariablesCUDA -{ +class VariablesCUDA { public: - VariablesCUDA() - { + VariablesCUDA() { gpu_sigmaSq = NULL; gpu_epsilon_Cn = NULL; gpu_n = NULL; @@ -94,7 +96,7 @@ public: int *gpu_VDW_Kind; int *gpu_isMartini; int *gpu_count; - int *gpu_startAtomIdx; //start atom index of the molecule + int *gpu_startAtomIdx; // start atom index of the molecule double *gpu_rCut, *gpu_rCutSq; double *gpu_rCutCoulomb, *gpu_rCutCoulombSq; double *gpu_rCutLow;