diff --git a/src/Ewald.cpp b/src/Ewald.cpp index 00b756868..37dba5695 100644 --- a/src/Ewald.cpp +++ b/src/Ewald.cpp @@ -1610,9 +1610,8 @@ void Ewald::BoxForceReciprocal(XYZArray const &molCoords, CallBoxForceReciprocalGPU(ff.particles->getCUDAVars(), atomForceRec, molForceRec, particleCharge, particleMol, - particleUsed, startMol, lengthMol, ff.alpha[box], - ff.alphaSq[box], constValue, imageSizeRef[box], - molCoords, currentAxes, box); + particleUsed, startMol, lengthMol, constValue, + imageSizeRef[box], molCoords, currentAxes, box); #else // molecule iterator MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(box); diff --git a/src/FFParticle.cpp b/src/FFParticle.cpp index bc43b0fa5..76004aabc 100644 --- a/src/FFParticle.cpp +++ b/src/FFParticle.cpp @@ -88,9 +88,10 @@ void FFParticle::Init(ff_setup::Particle const &mie, double diElectric_1 = 1.0 / forcefield.dielectric; InitGPUForceField(*varCUDA, sigmaSq, epsilon_cn, n, forcefield.vdwKind, forcefield.isMartini, count, forcefield.rCut, - forcefield.rCutCoulomb, forcefield.rCutLow, - forcefield.rswitch, forcefield.alpha, forcefield.ewald, - diElectric_1); + forcefield.rCutSq, forcefield.rCutCoulomb, + forcefield.rCutCoulombSq, forcefield.rCutLow, + forcefield.rswitch, forcefield.alpha, forcefield.alphaSq, + forcefield.ewald, diElectric_1); #endif } diff --git a/src/Forcefield.h b/src/Forcefield.h index 85ddcd5d5..64ac8f6ba 100644 --- a/src/Forcefield.h +++ b/src/Forcefield.h @@ -54,12 +54,12 @@ class Forcefield { double tolerance; // Ewald sum terms double rswitch; // Switch distance double dielectric; // dielectric for martini - double scaling_14; //!< Scaling factor for 1-4 pairs' ewald interactions + double scaling_14; //!< Scaling factor for 1-4 pairs' Ewald interactions double sc_alpha; // Free energy parameter double sc_sigma, sc_sigma_6; // Free energy parameter bool OneThree, OneFour, OneN; // To include 1-3, 1-4 and more interaction - bool electrostatic, ewald; // To consider columb interaction + bool electrostatic, ewald; // To consider coulomb interaction bool vdwGeometricSigma; // For sigma combining rule bool isMartini; bool exp6; diff --git a/src/GPU/CalculateEnergyCUDAKernel.cu b/src/GPU/CalculateEnergyCUDAKernel.cu index 84750b946..b959987f4 100644 --- a/src/GPU/CalculateEnergyCUDAKernel.cu +++ b/src/GPU/CalculateEnergyCUDAKernel.cu @@ -75,7 +75,8 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector &cellVector, vars->gpu_particleCharge, vars->gpu_particleKind, vars->gpu_particleMol, vars->gpu_REn, vars->gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, - vars->gpu_rCut, vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, + vars->gpu_rCut, vars->gpu_rCutSq, vars->gpu_rCutCoulomb, + vars->gpu_rCutCoulombSq, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], @@ -115,17 +116,18 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, - double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, - double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, - double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, - double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, - double *gpu_Invcell_y, double *gpu_Invcell_z, bool sc_coul, - double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rCut, double *gpu_rCutSq, double *gpu_rCutCoulomb, + double *gpu_rCutCoulombSq, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, int *gpu_ewald, double *gpu_diElectric_1, + int *gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box) { - __shared__ double shr_cutoff; + __shared__ double shr_cutoffSq; __shared__ int shr_particlesInsideCurrentCell, shr_numberOfPairs; __shared__ int shr_currentCellStartIndex, shr_neighborCellStartIndex; __shared__ bool shr_sameCell; @@ -158,7 +160,7 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, // Total number of pairs shr_numberOfPairs = shr_particlesInsideCurrentCell * particlesInsideNeighboringCell; - shr_cutoff = fmax(gpu_rCut[0], gpu_rCutCoulomb[box]); + shr_cutoffSq = fmax(gpu_rCutSq[0], gpu_rCutCoulombSq[box]); } __syncthreads(); @@ -182,18 +184,19 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, double distSq = 0.0; if (InRcutGPU(distSq, gpu_x, gpu_y, gpu_z, currentParticle, - neighborParticle, axis, halfAx, shr_cutoff, gpu_nonOrth[0], - gpu_cell_x, gpu_cell_y, gpu_cell_z, gpu_Invcell_x, - gpu_Invcell_y, gpu_Invcell_z)) { + neighborParticle, axis, halfAx, shr_cutoffSq, + gpu_nonOrth[0], gpu_cell_x, gpu_cell_y, gpu_cell_z, + gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z)) { int kA = gpu_particleKind[currentParticle]; int kB = gpu_particleKind[neighborParticle]; double lambdaVDW = DeviceGetLambdaVDW(mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaVDW); - LJEn += CalcEnGPU( - distSq, kA, kB, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, gpu_VDW_Kind[0], - gpu_isMartini[0], gpu_rCut[0], gpu_rOn[0], gpu_count[0], lambdaVDW, - sc_sigma_6, sc_alpha, sc_power, gpu_rMin, gpu_rMaxSq, gpu_expConst); + LJEn += CalcEnGPU(distSq, kA, kB, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, + gpu_VDW_Kind[0], gpu_isMartini[0], gpu_rCut[0], + gpu_rCutSq[0], gpu_rOn[0], gpu_count[0], lambdaVDW, + sc_sigma_6, sc_alpha, sc_power, gpu_rMin, gpu_rMaxSq, + gpu_expConst); if (electrostatic) { double qi_qj_fact = gpu_particleCharge[currentParticle] * @@ -202,11 +205,12 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, qi_qj_fact *= qqFactGPU; double lambdaCoulomb = DeviceGetLambdaCoulomb( mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); - REn += CalcCoulombGPU( - distSq, kA, kB, qi_qj_fact, gpu_rCutLow[0], gpu_ewald[0], - gpu_VDW_Kind[0], gpu_alpha[box], gpu_rCutCoulomb[box], - gpu_isMartini[0], gpu_diElectric_1[0], lambdaCoulomb, sc_coul, - sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq, gpu_count[0]); + REn += CalcCoulombGPU(distSq, kA, kB, qi_qj_fact, gpu_rCutLow[0], + gpu_ewald[0], gpu_VDW_Kind[0], gpu_alpha[box], + gpu_rCutCoulomb[box], gpu_rCutCoulombSq[box], + gpu_isMartini[0], gpu_diElectric_1[0], + lambdaCoulomb, sc_coul, sc_sigma_6, sc_alpha, + sc_power, gpu_sigmaSq, gpu_count[0]); } } } @@ -238,14 +242,13 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, } } -__device__ double -CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact, - double gpu_rCutLow, int gpu_ewald, int gpu_VDW_Kind, - double gpu_alpha, double gpu_rCutCoulomb, int gpu_isMartini, - double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_sigmaSq, int gpu_count) { - if ((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) { +__device__ double CalcCoulombGPU( + double distSq, int kind1, int kind2, double qi_qj_fact, double gpu_rCutLow, + int gpu_ewald, int gpu_VDW_Kind, double gpu_alpha, double gpu_rCutCoulomb, + double gpu_rCutCoulombSq, int gpu_isMartini, double gpu_diElectric_1, + double gpu_lambdaCoulomb, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq, int gpu_count) { + if (gpu_rCutCoulombSq < distSq) { return 0.0; } @@ -265,23 +268,24 @@ CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact, } else if (gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { return CalcCoulombSwitchMartiniGPU( distSq, index, qi_qj_fact, gpu_ewald, gpu_alpha, gpu_rCutCoulomb, - gpu_diElectric_1, gpu_lambdaCoulomb, sc_coul, sc_sigma_6, sc_alpha, - sc_power, gpu_sigmaSq); + gpu_rCutCoulombSq, gpu_diElectric_1, gpu_lambdaCoulomb, sc_coul, + sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq); } else return CalcCoulombSwitchGPU(distSq, index, qi_qj_fact, gpu_alpha, gpu_ewald, - gpu_rCutCoulomb, gpu_lambdaCoulomb, sc_coul, - sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq); + gpu_rCutCoulomb, gpu_rCutCoulombSq, + gpu_lambdaCoulomb, sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_sigmaSq); } __device__ double CalcEnGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, int gpu_VDW_Kind, - int gpu_isMartini, double gpu_rCut, double gpu_rOn, - int gpu_count, double gpu_lambdaVDW, - double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_rMin, double *gpu_rMaxSq, - double *gpu_expConst) { - if ((gpu_rCut * gpu_rCut) < distSq) { + int gpu_isMartini, double gpu_rCut, + double gpu_rCutSq, double gpu_rOn, int gpu_count, + double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst) { + if (gpu_rCutSq < distSq) { return 0.0; } @@ -291,7 +295,7 @@ __device__ double CalcEnGPU(double distSq, int kind1, int kind2, gpu_lambdaVDW, sc_sigma_6, sc_alpha, sc_power); } else if (gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { return CalcEnShiftGPU(distSq, index, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, - gpu_rCut, gpu_lambdaVDW, sc_sigma_6, sc_alpha, + gpu_rCutSq, gpu_lambdaVDW, sc_sigma_6, sc_alpha, sc_power); } else if (gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { return CalcEnExp6GPU(distSq, index, gpu_sigmaSq, gpu_n, gpu_lambdaVDW, @@ -303,7 +307,7 @@ __device__ double CalcEnGPU(double distSq, int kind1, int kind2, gpu_lambdaVDW, sc_sigma_6, sc_alpha, sc_power); } else return CalcEnSwitchGPU(distSq, index, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, - gpu_rCut, gpu_rOn, gpu_lambdaVDW, sc_sigma_6, + gpu_rCutSq, gpu_rOn, gpu_lambdaVDW, sc_sigma_6, sc_alpha, sc_power); } @@ -427,15 +431,15 @@ __device__ double CalcCoulombExp6GPUNoLambda(double distSq, double qi_qj_fact, return qi_qj_fact * value / dist; } -__device__ double -CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, double gpu_rCut, - double gpu_diElectric_1, double gpu_lambdaCoulomb, - bool sc_coul, double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq) { +__device__ double CalcCoulombSwitchMartiniGPU( + double distSq, int index, double qi_qj_fact, int gpu_ewald, + double gpu_alpha, double gpu_rCut, double gpu_rCutSq, + double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_sigmaSq) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombSwitchMartiniGPUNoLambda( - distSq, qi_qj_fact, gpu_ewald, gpu_alpha, gpu_rCut, gpu_diElectric_1); + return CalcCoulombSwitchMartiniGPUNoLambda(distSq, qi_qj_fact, gpu_ewald, + gpu_alpha, gpu_rCut, gpu_rCutSq, + gpu_diElectric_1); } if (sc_coul) { @@ -449,18 +453,17 @@ CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, double softRsq = cbrt(softDist6); return gpu_lambdaCoulomb * CalcCoulombSwitchMartiniGPUNoLambda( softRsq, qi_qj_fact, gpu_ewald, gpu_alpha, - gpu_rCut, gpu_diElectric_1); + gpu_rCut, gpu_rCutSq, gpu_diElectric_1); } else { return gpu_lambdaCoulomb * CalcCoulombSwitchMartiniGPUNoLambda( distSq, qi_qj_fact, gpu_ewald, gpu_alpha, - gpu_rCut, gpu_diElectric_1); + gpu_rCut, gpu_rCutSq, gpu_diElectric_1); } } -__device__ double -CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut, double gpu_diElectric_1) { +__device__ double CalcCoulombSwitchMartiniGPUNoLambda( + double distSq, double qi_qj_fact, int gpu_ewald, double gpu_alpha, + double gpu_rCut, double gpu_rCutSq, double gpu_diElectric_1) { if (gpu_ewald) { double dist = sqrt(distSq); double value = gpu_alpha * dist; @@ -481,10 +484,10 @@ CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact, // B1 / 4.0 * pow(gpu_rCut, 4); // Optimized computation - double A1 = -5.0 / (gpu_rCut * gpu_rCut * gpu_rCut * gpu_rCut); - double B1 = 4.0 / (gpu_rCut * gpu_rCut * gpu_rCut * gpu_rCut * gpu_rCut); - double C1 = 1.0 / gpu_rCut - A1 / 3.0 * gpu_rCut * gpu_rCut * gpu_rCut - - B1 / 4.0 * gpu_rCut * gpu_rCut * gpu_rCut * gpu_rCut; + double A1 = -5.0 / (gpu_rCutSq * gpu_rCutSq); + double B1 = 4.0 / (gpu_rCutSq * gpu_rCutSq * gpu_rCut); + double C1 = 1.0 / gpu_rCut - A1 / 3.0 * gpu_rCutSq * gpu_rCut - + B1 / 4.0 * gpu_rCutSq * gpu_rCutSq; double coul = -(A1 / 3.0) * rij_ronCoul_3 - (B1 / 4.0) * rij_ronCoul_4 - C1; return qi_qj_fact * gpu_diElectric_1 * (1.0 / dist + coul); @@ -494,12 +497,13 @@ CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact, __device__ double CalcCoulombSwitchGPU(double distSq, int index, double qi_qj_fact, double gpu_alpha, int gpu_ewald, double gpu_rCut, + double gpu_rCutSq, double gpu_lambdaCoulomb, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_sigmaSq) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombSwitchGPUNoLambda(distSq, qi_qj_fact, gpu_ewald, - gpu_alpha, gpu_rCut); + gpu_alpha, gpu_rCut, gpu_rCutSq); } if (sc_coul) { @@ -513,24 +517,24 @@ __device__ double CalcCoulombSwitchGPU(double distSq, int index, double softRsq = cbrt(softDist6); return gpu_lambdaCoulomb * CalcCoulombSwitchGPUNoLambda(softRsq, qi_qj_fact, gpu_ewald, - gpu_alpha, gpu_rCut); + gpu_alpha, gpu_rCut, gpu_rCutSq); } else { return gpu_lambdaCoulomb * CalcCoulombSwitchGPUNoLambda(distSq, qi_qj_fact, gpu_ewald, - gpu_alpha, gpu_rCut); + gpu_alpha, gpu_rCut, gpu_rCutSq); } } __device__ double CalcCoulombSwitchGPUNoLambda(double distSq, double qi_qj_fact, int gpu_ewald, double gpu_alpha, - double gpu_rCut) { + double gpu_rCut, + double gpu_rCutSq) { double dist = sqrt(distSq); if (gpu_ewald) { double value = gpu_alpha * dist; return qi_qj_fact * (1.0 - erf(value)) / dist; } else { - double rCutSq = gpu_rCut * gpu_rCut; - double switchVal = distSq / rCutSq - 1.0; + double switchVal = distSq / gpu_rCutSq - 1.0; switchVal *= switchVal; return qi_qj_fact * switchVal / dist; } @@ -571,12 +575,12 @@ __device__ double CalcEnParticleGPUNoLambda(double distSq, int index, __device__ double CalcEnShiftGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_lambdaVDW, + double gpu_rCutSq, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power) { if (gpu_lambdaVDW >= 0.999999) { return CalcEnShiftGPUNoLambda(distSq, index, gpu_sigmaSq, gpu_n, - gpu_epsilon_Cn, gpu_rCut); + gpu_epsilon_Cn, gpu_rCutSq); } double sigma6 = gpu_sigmaSq[index] * gpu_sigmaSq[index] * gpu_sigmaSq[index]; @@ -588,19 +592,19 @@ __device__ double CalcEnShiftGPU(double distSq, int index, double *gpu_sigmaSq, return gpu_lambdaVDW * CalcEnShiftGPUNoLambda(softRsq, index, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, - gpu_rCut); + gpu_rCutSq); } __device__ double CalcEnShiftGPUNoLambda(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, - double gpu_rCut) { + double gpu_rCutSq) { double rRat2 = gpu_sigmaSq[index] / distSq; double rRat4 = rRat2 * rRat2; double attract = rRat4 * rRat2; double repulse = pow(rRat2, gpu_n[index] * 0.5); - double shiftRRat2 = gpu_sigmaSq[index] / (gpu_rCut * gpu_rCut); + double shiftRRat2 = gpu_sigmaSq[index] / gpu_rCutSq; double shiftRRat4 = shiftRRat2 * shiftRRat2; double shiftAttract = shiftRRat4 * shiftRRat2; double shiftRepulse = pow(shiftRRat2, gpu_n[index] * 0.5); @@ -737,12 +741,12 @@ CalcEnSwitchMartiniGPUNoLambda(double distSq, int index, double *gpu_sigmaSq, __device__ double CalcEnSwitchGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_rOn, + double gpu_rCutSq, double gpu_rOn, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power) { if (gpu_lambdaVDW >= 0.999999) { return CalcEnSwitchGPUNoLambda(distSq, index, gpu_sigmaSq, gpu_n, - gpu_epsilon_Cn, gpu_rCut, gpu_rOn); + gpu_epsilon_Cn, gpu_rCutSq, gpu_rOn); } double sigma6 = gpu_sigmaSq[index] * gpu_sigmaSq[index] * gpu_sigmaSq[index]; sigma6 = max(sigma6, sc_sigma_6); @@ -753,17 +757,16 @@ __device__ double CalcEnSwitchGPU(double distSq, int index, double *gpu_sigmaSq, return gpu_lambdaVDW * CalcEnSwitchGPUNoLambda(softRsq, index, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, - gpu_rCut, gpu_rOn); + gpu_rCutSq, gpu_rOn); } __device__ double CalcEnSwitchGPUNoLambda(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_rOn) { - double rCutSq = gpu_rCut * gpu_rCut; + double gpu_rCutSq, double gpu_rOn) { double rOnSq = gpu_rOn * gpu_rOn; - double rCutSq_rijSq = rCutSq - distSq; + double rCutSq_rijSq = gpu_rCutSq - distSq; double rCutSq_rijSq_Sq = rCutSq_rijSq * rCutSq_rijSq; double rRat2 = gpu_sigmaSq[index] / distSq; @@ -772,8 +775,8 @@ __device__ double CalcEnSwitchGPUNoLambda(double distSq, int index, double repulse = pow(rRat2, gpu_n[index] * 0.5); - double factor1 = rCutSq - 3.0 * rOnSq; - double factor2 = pow((rCutSq - rOnSq), -3.0); + double factor1 = gpu_rCutSq - 3.0 * rOnSq; + double factor2 = pow((gpu_rCutSq - rOnSq), -3.0); double fE = rCutSq_rijSq_Sq * factor2 * (factor1 + 2.0 * distSq); const double factE = (distSq > rOnSq ? fE : 1.0); diff --git a/src/GPU/CalculateEnergyCUDAKernel.cuh b/src/GPU/CalculateEnergyCUDAKernel.cuh index b06931259..5a2449648 100644 --- a/src/GPU/CalculateEnergyCUDAKernel.cuh +++ b/src/GPU/CalculateEnergyCUDAKernel.cuh @@ -32,23 +32,23 @@ BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, - double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, - double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, - double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x, - double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, - double *gpu_Invcell_y, double *gpu_Invcell_z, bool sc_coul, - double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rCut, double *gpu_rCutSq, double *gpu_rCutCoulomb, + double *gpu_rCutCoulombSq, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, int *gpu_ewald, double *gpu_diElectric_1, + int *gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box); -__device__ double -CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact, - double gpu_rCutLow, int gpu_ewald, int gpu_VDW_Kind, - double gpu_alpha, double gpu_rCutCoulomb, int gpu_isMartini, - double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul, - double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_sigmaSq, int gpu_count); +__device__ double CalcCoulombGPU( + double distSq, int kind1, int kind2, double qi_qj_fact, double gpu_rCutLow, + int gpu_ewald, int gpu_VDW_Kind, double gpu_alpha, double gpu_rCutCoulomb, + double gpu_rCutCoulombSq, int gpu_isMartini, double gpu_diElectric_1, + double gpu_lambdaCoulomb, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_sigmaSq, int gpu_count); __device__ double CalcCoulombVirGPU(double distSq, double qi_qj, double gpu_rCutCoulomb, double gpu_alpha, int gpu_VDW_Kind, int gpu_ewald, @@ -56,11 +56,11 @@ __device__ double CalcCoulombVirGPU(double distSq, double qi_qj, __device__ double CalcEnGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, int gpu_VDW_Kind, - int gpu_isMartini, double gpu_rCut, double gpu_rOn, - int gpu_count, double gpu_lambdaVDW, - double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_rMin, double *gpu_rMaxSq, - double *gpu_expConst); + int gpu_isMartini, double gpu_rCut, + double gpu_rCutSq, double gpu_rOn, int gpu_count, + double gpu_lambdaVDW, double sc_sigma_6, + double sc_alpha, uint sc_power, double *gpu_rMin, + double *gpu_rMaxSq, double *gpu_expConst); // ElectroStatic Calculation //**************************************************************// @@ -91,25 +91,25 @@ __device__ double CalcCoulombExp6GPU(double distSq, int index, double *gpu_sigmaSq); __device__ double CalcCoulombExp6GPUNoLambda(double distSq, double qi_qj_fact, int gpu_ewald, double gpu_alpha); -__device__ double -CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, double gpu_rCut, - double gpu_diElectric_1, double gpu_lambdaCoulomb, - bool sc_coul, double sc_sigma_6, double sc_alpha, - uint sc_power, double *gpu_sigmaSq); -__device__ double -CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact, - int gpu_ewald, double gpu_alpha, - double gpu_rCut, double gpu_diElectric_1); +__device__ double CalcCoulombSwitchMartiniGPU( + double distSq, int index, double qi_qj_fact, int gpu_ewald, + double gpu_alpha, double gpu_rCut, double gpu_rCutSq, + double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul, + double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_sigmaSq); +__device__ double CalcCoulombSwitchMartiniGPUNoLambda( + double distSq, double qi_qj_fact, int gpu_ewald, double gpu_alpha, + double gpu_rCut, double gpu_rCutSq, double gpu_diElectric_1); __device__ double CalcCoulombSwitchGPU(double distSq, int index, double qi_qj_fact, double gpu_alpha, int gpu_ewald, double gpu_rCut, + double gpu_rCutSq, double gpu_lambdaCoulomb, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_sigmaSq); __device__ double CalcCoulombSwitchGPUNoLambda(double distSq, double qi_qj_fact, int gpu_ewald, double gpu_alpha, - double gpu_rCut); + double gpu_rCut, + double gpu_rCutSq); // VDW Calculation //*****************************************************************// @@ -123,7 +123,7 @@ __device__ double CalcEnParticleGPUNoLambda(double distSq, int index, double *gpu_epsilon_Cn); __device__ double CalcEnShiftGPU(double distSq, int index, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, - double gpu_rCut, double gpu_lambdaVDW, + double gpu_rCutSq, double gpu_lambdaVDW, double sc_sigma_6, double sc_alpha, uint sc_power); __device__ double CalcEnShiftGPUNoLambda(double distSq, int index, diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu index 730477641..49854fa86 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cu +++ b/src/GPU/CalculateEwaldCUDAKernel.cu @@ -141,11 +141,11 @@ __global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y, int image = blockIdx.x; double sumR = 0.0, sumI = 0.0; #pragma unroll 8 - for (int particleID = threadIdx.x; particleID < atomNumber; particleID += THREADS_PER_BLOCK_SM) { - double dot = DotProductGPU(gpu_kx[image], gpu_ky[image], - gpu_kz[image], gpu_x[particleID], - gpu_y[particleID], - gpu_z[particleID]); + for (int particleID = threadIdx.x; particleID < atomNumber; + particleID += THREADS_PER_BLOCK_SM) { + double dot = + DotProductGPU(gpu_kx[image], gpu_ky[image], gpu_kz[image], + gpu_x[particleID], gpu_y[particleID], gpu_z[particleID]); double dotsin, dotcos; sincos(dot, &dotsin, &dotcos); sumR += gpu_molCharge[particleID] * dotcos; @@ -343,8 +343,8 @@ void CallBoxForceReciprocalGPU( const std::vector &particleCharge, const std::vector &particleMol, const std::vector &particleUsed, const std::vector &startMol, const std::vector &lengthMol, - double alpha, double alphaSq, double constValue, uint imageSize, - XYZArray const &molCoords, BoxDimensions const &boxAxes, int box) { + double constValue, uint imageSize, XYZArray const &molCoords, + BoxDimensions const &boxAxes, int box) { int atomCount = atomForceRec.Count(); int molCount = molForceRec.Count(); int *gpu_particleUsed; @@ -370,8 +370,8 @@ void CallBoxForceReciprocalGPU( cudaMemcpyHostToDevice); cudaMemcpy(vars->gpu_mForceRecz, molForceRec.z, sizeof(double) * molCount, cudaMemcpyHostToDevice); - cudaMemcpy(gpu_particleUsed, &particleUsed[0], sizeof(int) * particleUsed.size(), - cudaMemcpyHostToDevice); + cudaMemcpy(gpu_particleUsed, &particleUsed[0], + sizeof(int) * particleUsed.size(), cudaMemcpyHostToDevice); cudaMemcpy(vars->gpu_x, molCoords.x, sizeof(double) * atomCount, cudaMemcpyHostToDevice); cudaMemcpy(vars->gpu_y, molCoords.y, sizeof(double) * atomCount, @@ -391,15 +391,15 @@ void CallBoxForceReciprocalGPU( vars->gpu_aForceRecx, vars->gpu_aForceRecy, vars->gpu_aForceRecz, vars->gpu_mForceRecx, vars->gpu_mForceRecy, vars->gpu_mForceRecz, vars->gpu_particleCharge, vars->gpu_particleMol, gpu_particleUsed, - gpu_startMol, gpu_lengthMol, alpha, alphaSq, constValue, imageSize, - vars->gpu_kxRef[box], vars->gpu_kyRef[box], vars->gpu_kzRef[box], - vars->gpu_x, vars->gpu_y, vars->gpu_z, vars->gpu_prefactRef[box], - vars->gpu_sumRnew[box], vars->gpu_sumInew[box], vars->gpu_isFraction, - vars->gpu_molIndex, vars->gpu_lambdaCoulomb, vars->gpu_cell_x[box], - vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], - vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, - boxAxes.GetAxis(box).x, boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z, - box); + gpu_startMol, gpu_lengthMol, vars->gpu_alpha, vars->gpu_alphaSq, + constValue, imageSize, vars->gpu_kxRef[box], vars->gpu_kyRef[box], + vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, vars->gpu_z, + vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box], + vars->gpu_isFraction, vars->gpu_molIndex, vars->gpu_lambdaCoulomb, + vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], + vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], + vars->gpu_Invcell_z[box], vars->gpu_nonOrth, boxAxes.GetAxis(box).x, + boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z, box); #ifndef NDEBUG cudaDeviceSynchronize(); checkLastErrorCUDA(__FILE__, __LINE__); @@ -429,18 +429,19 @@ void CallBoxForceReciprocalGPU( __global__ void BoxForceReciprocalGPU( double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz, double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, - double *gpu_particleCharge, int *gpu_particleMol, const int *gpu_particleUsed, - int *gpu_startMol, int *gpu_lengthMol, double alpha, double alphaSq, - double constValue, int imageSize, double *gpu_kx, double *gpu_ky, - double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, - double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, - bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - int *gpu_nonOrth, double axx, double axy, double axz, int box) { - - __shared__ int particleID, moleculeID; - __shared__ double x, y, z, lambdaCoef, fixed; + double *gpu_particleCharge, int *gpu_particleMol, + const int *gpu_particleUsed, int *gpu_startMol, int *gpu_lengthMol, + double *gpu_alpha, double *gpu_alphaSq, double constValue, int imageSize, + double *gpu_kx, double *gpu_ky, double *gpu_kz, double *gpu_x, + double *gpu_y, double *gpu_z, double *gpu_prefact, double *gpu_sumRnew, + double *gpu_sumInew, bool *gpu_isFraction, int *gpu_molIndex, + double *gpu_lambdaCoulomb, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, double axx, double axy, double axz, + int box) { + + __shared__ int particleID, moleculeID; + __shared__ double x, y, z, lambdaCoef, fixed; if (threadIdx.x == 0) { // The particleID is the atom that corresponds to this particleUsed entry @@ -462,8 +463,8 @@ __global__ void BoxForceReciprocalGPU( double dot = x * gpu_kx[image] + y * gpu_ky[image] + z * gpu_kz[image]; double dotsin, dotcos; sincos(dot, &dotsin, &dotcos); - double factor = fixed * gpu_prefact[image] * (dotsin * gpu_sumRnew[image] - - dotcos * gpu_sumInew[image]); + double factor = fixed * gpu_prefact[image] * + (dotsin * gpu_sumRnew[image] - dotcos * gpu_sumInew[image]); forceX += factor * gpu_kx[image]; forceY += factor * gpu_ky[image]; forceZ += factor * gpu_kz[image]; @@ -471,7 +472,7 @@ __global__ void BoxForceReciprocalGPU( // loop over other particles within the same molecule // Pick the thread most likely to exit the for loop early - if (threadIdx.x == THREADS_PER_BLOCK-1) { + if (threadIdx.x == THREADS_PER_BLOCK - 1) { double intraForce = 0.0, distSq = 0.0, dist = 0.0; double3 distVect; int lastParticleWithinSameMolecule = @@ -485,11 +486,12 @@ __global__ void BoxForceReciprocalGPU( gpu_Invcell_z); dist = sqrt(distSq); - double expConstValue = exp(-1.0 * alphaSq * distSq); + double expConstValue = exp(-1.0 * gpu_alphaSq[box] * distSq); double qiqj = gpu_particleCharge[particleID] * gpu_particleCharge[otherParticle] * qqFactGPU; intraForce = qiqj * lambdaCoef * lambdaCoef / distSq; - intraForce *= (erf(alpha * dist) / dist) - constValue * expConstValue; + intraForce *= + (erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue; forceX -= intraForce * distVect.x; forceY -= intraForce * distVect.y; forceZ -= intraForce * distVect.z; diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh index ff0ff58c0..a7759e009 100644 --- a/src/GPU/CalculateEwaldCUDAKernel.cuh +++ b/src/GPU/CalculateEwaldCUDAKernel.cuh @@ -20,8 +20,8 @@ void CallBoxForceReciprocalGPU( const std::vector &particleCharge, const std::vector &particleMol, const std::vector &particleUsed, const std::vector &startMol, const std::vector &lengthMol, - double alpha, double alphaSq, double constValue, uint imageSize, - XYZArray const &molCoords, BoxDimensions const &boxAxes, int box); + double constValue, uint imageSize, XYZArray const &molCoords, + BoxDimensions const &boxAxes, int box); void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords, double const *kx, double const *ky, @@ -57,15 +57,16 @@ void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, uint box, __global__ void BoxForceReciprocalGPU( double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz, double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz, - double *gpu_particleCharge, int *gpu_particleMol, const int *gpu_particleUsed, - int *gpu_startMol, int *gpu_lengthMol, double alpha, double alphaSq, - double constValue, int imageSize, double *gpu_kx, double *gpu_ky, - double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z, - double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew, - bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - int *gpu_nonOrth, double axx, double axy, double axz, int box); + double *gpu_particleCharge, int *gpu_particleMol, + const int *gpu_particleUsed, int *gpu_startMol, int *gpu_lengthMol, + double *gpu_alpha, double *gpu_alphaSq, double constValue, int imageSize, + double *gpu_kx, double *gpu_ky, double *gpu_kz, double *gpu_x, + double *gpu_y, double *gpu_z, double *gpu_prefact, double *gpu_sumRnew, + double *gpu_sumInew, bool *gpu_isFraction, int *gpu_molIndex, + double *gpu_lambdaCoulomb, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, int *gpu_nonOrth, double axx, double axy, double axz, + int box); __global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_kx, diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu index d72acb284..a39e8aaf6 100644 --- a/src/GPU/CalculateForceCUDAKernel.cu +++ b/src/GPU/CalculateForceCUDAKernel.cu @@ -8,6 +8,7 @@ along with this program, also can be found at #ifdef GOMC_CUDA #include +#include #include "CUDAMemoryManager.cuh" #include "CalculateEnergyCUDAKernel.cuh" @@ -97,7 +98,8 @@ void CallBoxInterForceGPU( vars->gpu_vT12, vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, + vars->gpu_rCutSq, vars->gpu_rCutCoulomb, vars->gpu_rCutCoulombSq, + vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth, @@ -238,16 +240,17 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector &cellVector, vars->gpu_z, axis, halfAx, electrostatic, vars->gpu_particleCharge, vars->gpu_particleKind, vars->gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind, - vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, - vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, - vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth, - vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box], - vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box], - vars->gpu_Invcell_z[box], vars->gpu_aForcex, vars->gpu_aForcey, - vars->gpu_aForcez, vars->gpu_mForcex, vars->gpu_mForcey, - vars->gpu_mForcez, sc_coul, sc_sigma_6, sc_alpha, sc_power, - vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst, vars->gpu_molIndex, - vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box); + vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut, vars->gpu_rCutSq, + vars->gpu_rCutCoulomb, vars->gpu_rCutCoulombSq, vars->gpu_rCutLow, + vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq, vars->gpu_ewald, + vars->gpu_diElectric_1, vars->gpu_nonOrth, vars->gpu_cell_x[box], + vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], + vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_aForcex, + vars->gpu_aForcey, vars->gpu_aForcez, vars->gpu_mForcex, + vars->gpu_mForcey, vars->gpu_mForcez, sc_coul, sc_sigma_6, sc_alpha, + sc_power, vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst, + vars->gpu_molIndex, vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, + vars->gpu_isFraction, box); #ifndef NDEBUG cudaDeviceSynchronize(); checkLastErrorCUDA(__FILE__, __LINE__); @@ -381,15 +384,16 @@ __global__ void BoxInterForceGPU( double *gpu_vT12, double *gpu_vT13, double *gpu_vT22, double *gpu_vT23, double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, - double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, - double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, + double *gpu_rCut, double *gpu_rCutSq, double *gpu_rCutCoulomb, + double *gpu_rCutCoulombSq, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box) { - __shared__ double shr_cutoff; + __shared__ double shr_cutoffSq; __shared__ int shr_particlesInsideCurrentCell, shr_numberOfPairs; __shared__ int shr_currentCellStartIndex, shr_neighborCellStartIndex; __shared__ bool shr_sameCell; @@ -446,7 +450,7 @@ __global__ void BoxInterForceGPU( // Total number of pairs shr_numberOfPairs = shr_particlesInsideCurrentCell * particlesInsideNeighboringCell; - shr_cutoff = fmax(gpu_rCut[0], gpu_rCutCoulomb[box]); + shr_cutoffSq = fmax(gpu_rCutSq[0], gpu_rCutCoulombSq[box]); } __syncthreads(); @@ -471,9 +475,9 @@ __global__ void BoxInterForceGPU( bool skip = mA == mB || (shr_sameCell && mA > mB); if (!skip) { if (InRcutGPU(distSq, virComponents, gpu_x, gpu_y, gpu_z, currentParticle, - neighborParticle, axis, halfAx, shr_cutoff, gpu_nonOrth[0], - gpu_cell_x, gpu_cell_y, gpu_cell_z, gpu_Invcell_x, - gpu_Invcell_y, gpu_Invcell_z)) { + neighborParticle, axis, halfAx, shr_cutoffSq, + gpu_nonOrth[0], gpu_cell_x, gpu_cell_y, gpu_cell_z, + gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z)) { int kA = gpu_particleKind[currentParticle]; int kB = gpu_particleKind[neighborParticle]; @@ -490,9 +494,9 @@ __global__ void BoxInterForceGPU( double pVF = CalcEnForceGPU( distSq, kA, kB, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, gpu_rCut[0], - gpu_rOn[0], gpu_isMartini[0], gpu_VDW_Kind[0], gpu_count[0], - lambdaVDW, sc_sigma_6, sc_alpha, sc_power, gpu_rMin, gpu_rMaxSq, - gpu_expConst); + gpu_rCutSq[0], gpu_rOn[0], gpu_isMartini[0], gpu_VDW_Kind[0], + gpu_count[0], lambdaVDW, sc_sigma_6, sc_alpha, sc_power, gpu_rMin, + gpu_rMaxSq, gpu_expConst); local_vT11 += pVF * (virComponents.x * diff_com.x); local_vT22 += pVF * (virComponents.y * diff_com.y); @@ -518,9 +522,10 @@ __global__ void BoxInterForceGPU( mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb); double pRF = CalcCoulombForceGPU( distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0], - gpu_alpha[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0], - gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, - lambdaCoulomb, gpu_count[0], kA, kB); + gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], + gpu_rCutCoulombSq[box], gpu_diElectric_1[0], gpu_sigmaSq, + sc_coul, sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, + gpu_count[0], kA, kB); local_rT11 += pRF * (virComponents.x * diff_com.x); local_rT22 += pRF * (virComponents.y * diff_com.y); @@ -593,26 +598,26 @@ __global__ void BoxInterForceGPU( } } -__global__ void -BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, - int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, - double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, - double3 halfAx, bool electrostatic, double *gpu_particleCharge, - int *gpu_particleKind, int *gpu_particleMol, double *gpu_REn, - double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, - int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, - double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, - int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - double *gpu_aForcex, double *gpu_aForcey, double *gpu_aForcez, - double *gpu_mForcex, double *gpu_mForcey, double *gpu_mForcez, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, - int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, - bool *gpu_isFraction, int box) { - __shared__ double shr_cutoff; +__global__ void BoxForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, + double *gpu_rCutSq, double *gpu_rCutCoulomb, double *gpu_rCutCoulombSq, + double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, + int *gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, double *gpu_aForcex, double *gpu_aForcey, + double *gpu_aForcez, double *gpu_mForcex, double *gpu_mForcey, + double *gpu_mForcez, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, + int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, + bool *gpu_isFraction, int box) { + __shared__ double shr_cutoffSq; __shared__ int shr_particlesInsideCurrentCell, shr_currentCellStartIndex; double REn = 0.0, LJEn = 0.0; int currentCell = blockIdx.x; @@ -622,7 +627,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, shr_currentCellStartIndex = gpu_cellStartIndex[currentCell]; shr_particlesInsideCurrentCell = gpu_cellStartIndex[currentCell + 1] - shr_currentCellStartIndex; - shr_cutoff = fmax(gpu_rCut[0], gpu_rCutCoulomb[box]); + shr_cutoffSq = fmax(gpu_rCutSq[0], gpu_rCutCoulombSq[box]); } __syncthreads(); @@ -651,7 +656,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, double distSq; double3 virComponents; if (InRcutGPU(distSq, virComponents, gpu_x, gpu_y, gpu_z, particle, - neighbor, axis, halfAx, shr_cutoff, gpu_nonOrth[0], + neighbor, axis, halfAx, shr_cutoffSq, gpu_nonOrth[0], gpu_cell_x, gpu_cell_y, gpu_cell_z, gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z)) { @@ -662,17 +667,17 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, int kB = gpu_particleKind[neighbor]; if (currentCell < neighborCell || (currentCell == neighborCell && particle < neighbor)) { - LJEn += CalcEnGPU(distSq, kA, kB, gpu_sigmaSq, gpu_n, - gpu_epsilon_Cn, gpu_VDW_Kind[0], - gpu_isMartini[0], gpu_rCut[0], gpu_rOn[0], - gpu_count[0], lambdaVDW, sc_sigma_6, sc_alpha, - sc_power, gpu_rMin, gpu_rMaxSq, gpu_expConst); + LJEn += CalcEnGPU( + distSq, kA, kB, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, + gpu_VDW_Kind[0], gpu_isMartini[0], gpu_rCut[0], gpu_rCutSq[0], + gpu_rOn[0], gpu_count[0], lambdaVDW, sc_sigma_6, sc_alpha, + sc_power, gpu_rMin, gpu_rMaxSq, gpu_expConst); } double forces = CalcEnForceGPU( distSq, kA, kB, gpu_sigmaSq, gpu_n, gpu_epsilon_Cn, gpu_rCut[0], - gpu_rOn[0], gpu_isMartini[0], gpu_VDW_Kind[0], gpu_count[0], - lambdaVDW, sc_sigma_6, sc_alpha, sc_power, gpu_rMin, gpu_rMaxSq, - gpu_expConst); + gpu_rCutSq[0], gpu_rOn[0], gpu_isMartini[0], gpu_VDW_Kind[0], + gpu_count[0], lambdaVDW, sc_sigma_6, sc_alpha, sc_power, + gpu_rMin, gpu_rMaxSq, gpu_expConst); double qi_qj_fact = 0.0; if (electrostatic) { qi_qj_fact = @@ -687,13 +692,14 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, REn += CalcCoulombGPU( distSq, kA, kB, qi_qj_fact, gpu_rCutLow[0], gpu_ewald[0], gpu_VDW_Kind[0], gpu_alpha[box], gpu_rCutCoulomb[box], - gpu_isMartini[0], gpu_diElectric_1[0], lambdaCoulomb, - sc_coul, sc_sigma_6, sc_alpha, sc_power, gpu_sigmaSq, - gpu_count[0]); + gpu_rCutCoulombSq[box], gpu_isMartini[0], + gpu_diElectric_1[0], lambdaCoulomb, sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_sigmaSq, gpu_count[0]); } forces += CalcCoulombForceGPU( distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0], - gpu_isMartini[0], gpu_alpha[box], gpu_rCutCoulomb[box], + gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], + gpu_rCutCoulomb[box], gpu_rCutCoulombSq[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA, kB); } @@ -834,11 +840,11 @@ __global__ void VirialReciprocalGPU( __device__ double CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, - double gpu_rOn, int gpu_isMartini, int gpu_VDW_Kind, - int gpu_count, double gpu_lambdaVDW, double sc_sigma_6, - double sc_alpha, uint sc_power, double *gpu_rMin, - double *gpu_rMaxSq, double *gpu_expConst) { - if ((gpu_rCut * gpu_rCut) < distSq) { + double gpu_rCutSq, double gpu_rOn, int gpu_isMartini, + int gpu_VDW_Kind, int gpu_count, double gpu_lambdaVDW, + double sc_sigma_6, double sc_alpha, uint sc_power, + double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst) { + if (gpu_rCutSq < distSq) { return 0.0; } @@ -861,19 +867,21 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, gpu_rOn, sc_sigma_6, sc_alpha, sc_power, gpu_lambdaVDW); } else return CalcVirSwitchGPU(distSq, index, gpu_sigmaSq[index], gpu_epsilon_Cn, - gpu_n, gpu_rCut, gpu_rOn); + gpu_n, gpu_rCutSq, gpu_rOn); } // ElectroStatic Calculation //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { @@ -886,22 +894,25 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return gpu_lambdaCoulomb * CalcCoulombVirParticleGPU(distSq, qi_qj, + gpu_ewald, gpu_alpha, + gpu_alphaSq); } } __device__ double CalcCoulombVirParticleGPU(const double distSq, const double qi_qj, const int gpu_ewald, - const double gpu_alpha) { + const double gpu_alpha, + const double gpu_alphaSq) { const double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double alphaValue = gpu_alpha * M_2_SQRTPI; - double expValue = exp(-gpu_alpha * gpu_alpha * distSq); + double expValue = exp(-gpu_alphaSq * distSq); double erfcValue = erfc(gpu_alpha * dist) / dist; return qi_qj * (alphaValue * expValue + erfcValue) / distSq; } else { @@ -911,12 +922,14 @@ __device__ double CalcCoulombVirParticleGPU(const double distSq, __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { @@ -929,20 +942,22 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return gpu_lambdaCoulomb * CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, + gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-gpu_alphaSq * distSq); double temp = erfc(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -952,12 +967,14 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } if (sc_coul) { double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq; @@ -969,20 +986,22 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, double softRsq = cbrt(softDist6); double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * - CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha); + CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha); + return gpu_lambdaCoulomb * CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, + gpu_alpha, gpu_alphaSq); } } __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha) { + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-gpu_alphaSq * distSq); double temp = erfc(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -992,12 +1011,13 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, __device__ double CalcCoulombVirSwitchMartiniGPU( double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb) { + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut, gpu_diElectric_1); + gpu_alphaSq, gpu_rCut, + gpu_diElectric_1); } if (sc_coul) { @@ -1011,24 +1031,24 @@ __device__ double CalcCoulombVirSwitchMartiniGPU( double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut, gpu_diElectric_1); + gpu_alphaSq, gpu_rCut, + gpu_diElectric_1); } else { - return gpu_lambdaCoulomb * - CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut, gpu_diElectric_1); + return gpu_lambdaCoulomb * CalcCoulombVirSwitchMartiniGPU( + distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, gpu_rCut, gpu_diElectric_1); } } -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, - double gpu_rCut, - double gpu_diElectric_1) { +__device__ double +CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, + double gpu_rCut, double gpu_diElectric_1) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-gpu_alphaSq * distSq); double temp = erfc(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { @@ -1055,14 +1075,14 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut, int index, - double gpu_sigmaSq, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, + double gpu_alphaSq, double gpu_rCutSq, + int index, double gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb) { if (gpu_lambdaCoulomb >= 0.999999) { return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut); + gpu_alphaSq, gpu_rCutSq); } if (sc_coul) { @@ -1076,29 +1096,31 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, double correction = distSq / softRsq; return gpu_lambdaCoulomb * correction * correction * CalcCoulombVirSwitchGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCut); + gpu_alphaSq, gpu_rCutSq); } else { return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, - gpu_alpha, gpu_rCut); + gpu_alpha, gpu_alphaSq, + gpu_rCutSq); } } __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut) { + double gpu_alphaSq, + double gpu_rCutSq) { double dist = sqrt(distSq); if (gpu_ewald) { // M_2_SQRTPI is 2/sqrt(PI) double constValue = gpu_alpha * M_2_SQRTPI; - double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq); + double expConstValue = exp(-gpu_alphaSq * distSq); double temp = erfc(gpu_alpha * dist); return qi_qj * (temp / dist + constValue * expConstValue) / distSq; } else { - double rCutSq = gpu_rCut * gpu_rCut; - double switchVal = distSq / rCutSq - 1.0; + double switchVal = distSq / gpu_rCutSq - 1.0; switchVal *= switchVal; - double dSwitchVal = 2.0 * (distSq / rCutSq - 1.0) * 2.0 * dist / rCutSq; + double dSwitchVal = + 2.0 * (distSq / gpu_rCutSq - 1.0) * 2.0 * dist / gpu_rCutSq; return -1.0 * qi_qj * (dSwitchVal / distSq - switchVal / (distSq * dist)); } } @@ -1275,12 +1297,12 @@ __device__ double CalcVirSwitchMartiniGPU(double distSq, int index, __device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, - double gpu_rCut, double gpu_rOn, + double gpu_rCutSq, double gpu_rOn, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaVDW) { if (gpu_lambdaVDW >= 0.999999) { return CalcVirSwitchGPU(distSq, index, gpu_sigmaSq, gpu_epsilon_Cn, gpu_n, - gpu_rCut, gpu_rOn); + gpu_rCutSq, gpu_rOn); } double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq; @@ -1292,14 +1314,13 @@ __device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, double correction = distSq / softRsq; return gpu_lambdaVDW * correction * correction * CalcVirSwitchGPU(softRsq, index, gpu_sigmaSq, gpu_epsilon_Cn, gpu_n, - gpu_rCut, gpu_rOn); + gpu_rCutSq, gpu_rOn); } __device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, - double gpu_rCut, double gpu_rOn) { - double rCutSq = gpu_rCut * gpu_rCut; - double rCutSq_rijSq = rCutSq - distSq; + double gpu_rCutSq, double gpu_rOn) { + double rCutSq_rijSq = gpu_rCutSq - distSq; double rCutSq_rijSq_Sq = rCutSq_rijSq * rCutSq_rijSq; double rOnSq = gpu_rOn * gpu_rOn; @@ -1308,9 +1329,9 @@ __device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, double rRat4 = rRat2 * rRat2; double attract = rRat4 * rRat2; double repulse = pow(rRat2, gpu_n[index] * 0.5); - double factor1 = rCutSq - 3.0 * rOnSq; - double factor2 = - 1.0 / ((rCutSq - rOnSq) * (rCutSq - rOnSq) * (rCutSq - rOnSq)); + double factor1 = gpu_rCutSq - 3.0 * rOnSq; + double factor2 = 1.0 / ((gpu_rCutSq - rOnSq) * (gpu_rCutSq - rOnSq) * + (gpu_rCutSq - rOnSq)); double fE = rCutSq_rijSq_Sq * factor2 * (factor1 + 2.0 * distSq); double fW = 12.0 * factor2 * rCutSq_rijSq * (rOnSq - distSq); diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh index 69f6cf41e..59e27174c 100644 --- a/src/GPU/CalculateForceCUDAKernel.cuh +++ b/src/GPU/CalculateForceCUDAKernel.cuh @@ -46,25 +46,25 @@ void CallVirialReciprocalGPU(VariablesCUDA *vars, XYZArray const ¤tCoords, double &wT23, double &wT33, uint imageSize, double constVal, uint box); -__global__ void -BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, - int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, - double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, - double3 halfAx, bool electrostatic, double *gpu_particleCharge, - int *gpu_particleKind, int *gpu_particleMol, double *gpu_REn, - double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn, - double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, - int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb, - double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, - int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth, - double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, - double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, - double *gpu_aForcex, double *gpu_aForcey, double *gpu_aForcez, - double *gpu_mForcex, double *gpu_mForcey, double *gpu_mForcez, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, - int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, - bool *gpu_isFraction, int box); +__global__ void BoxForceGPU( + int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, + int numberOfCells, int atomNumber, int *gpu_mapParticleToCell, + double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx, + bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind, + int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn, + double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, + int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut, + double *gpu_rCutSq, double *gpu_rCutCoulomb, double *gpu_rCutCoulombSq, + double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, + double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, + int *gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y, + double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, + double *gpu_Invcell_z, double *gpu_aForcex, double *gpu_aForcey, + double *gpu_aForcez, double *gpu_mForcex, double *gpu_mForcey, + double *gpu_mForcez, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst, + int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb, + bool *gpu_isFraction, int box); __global__ void BoxInterForceGPU( int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList, @@ -77,8 +77,9 @@ __global__ void BoxInterForceGPU( double *gpu_vT12, double *gpu_vT13, double *gpu_vT22, double *gpu_vT23, double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, - double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow, - double *gpu_rOn, double *gpu_alpha, int *gpu_ewald, + double *gpu_rCut, double *gpu_rCutSq, double *gpu_rCutCoulomb, + double *gpu_rCutCoulombSq, double *gpu_rCutLow, double *gpu_rOn, + double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald, double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6, @@ -98,57 +99,61 @@ __global__ void VirialReciprocalGPU( __device__ double CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq, double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut, - double gpu_rOn, int gpu_isMartini, int gpu_VDW_Kind, - int gpu_count, double gpu_lambdaVDW, double sc_sigma_6, - double sc_alpha, uint sc_power, double *gpu_rMin, - double *gpu_rMaxSq, double *gpu_expConst); + double gpu_rCutSq, double gpu_rOn, int gpu_isMartini, + int gpu_VDW_Kind, int gpu_count, double gpu_lambdaVDW, + double sc_sigma_6, double sc_alpha, uint sc_power, + double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst); // ElectroStatic Calculation //**************************************************************// __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, + double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, + double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + int gpu_ewald, double gpu_alpha, + double gpu_alphaSq); +__device__ double +CalcCoulombVirExp6GPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, - double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); -__device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj, - int gpu_ewald, double gpu_alpha); + double gpu_alphaSq); __device__ double CalcCoulombVirSwitchMartiniGPU( double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq, - bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, - double gpu_lambdaCoulomb); -__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, - int gpu_ewald, - double gpu_alpha, - double gpu_rCut, - double gpu_diElectric_1); + double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index, + double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, + uint sc_power, double gpu_lambdaCoulomb); +__device__ double +CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald, + double gpu_alpha, double gpu_alphaSq, + double gpu_rCut, double gpu_diElectric_1); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut, int index, - double gpu_sigmaSq, bool sc_coul, - double sc_sigma_6, double sc_alpha, - uint sc_power, + double gpu_alphaSq, double gpu_rCutSq, + int index, double gpu_sigmaSq, + bool sc_coul, double sc_sigma_6, + double sc_alpha, uint sc_power, double gpu_lambdaCoulomb); __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, - double gpu_rCut); + double gpu_alphaSq, + double gpu_rCutSq); // VDW Calculation //*****************************************************************// @@ -185,12 +190,12 @@ __device__ double CalcVirSwitchMartiniGPU(double distSq, int index, double gpu_rCut, double rOn); __device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, - double gpu_rCut, double gpu_rOn, + double gpu_rCutSq, double gpu_rOn, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaVDW); __device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n, - double gpu_rCut, double gpu_rOn); + double gpu_rCutSq, double gpu_rOn); // Have to move the implementation for some functions here // since CUDA doesn't allow __global__ to call __device__ @@ -199,37 +204,41 @@ __device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq, __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj, int gpu_VDW_Kind, int gpu_ewald, int gpu_isMartini, double gpu_alpha, - double gpu_rCutCoulomb, double gpu_diElectric_1, + double gpu_alphaSq, double gpu_rCutCoulomb, + double gpu_rCutCoulombSq, double gpu_diElectric_1, double *gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power, double gpu_lambdaCoulomb, int gpu_count, int kind1, int kind2) { - if ((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) { + if (gpu_rCutCoulombSq < distSq) { return 0.0; } int index = FlatIndexGPU(kind1, kind2, gpu_count); if (gpu_VDW_Kind == GPU_VDW_STD_KIND) { - return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, - sc_alpha, sc_power, gpu_lambdaCoulomb); + return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); } else if (gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) { - return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, - sc_alpha, sc_power, gpu_lambdaCoulomb); + return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); } else if (gpu_VDW_Kind == GPU_VDW_EXP6_KIND) { - return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index, - gpu_sigmaSq[index], sc_coul, sc_sigma_6, - sc_alpha, sc_power, gpu_lambdaCoulomb); + return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, + gpu_alphaSq, index, gpu_sigmaSq[index], + sc_coul, sc_sigma_6, sc_alpha, sc_power, + gpu_lambdaCoulomb); } else if (gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) { return CalcCoulombVirSwitchMartiniGPU( - distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_rCutCoulomb, gpu_diElectric_1, - index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha, sc_power, - gpu_lambdaCoulomb); + distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb, + gpu_diElectric_1, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_lambdaCoulomb); } else return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, - gpu_rCutCoulomb, index, gpu_sigmaSq[index], - sc_coul, sc_sigma_6, sc_alpha, sc_power, - gpu_lambdaCoulomb); + gpu_alphaSq, gpu_rCutCoulombSq, index, + gpu_sigmaSq[index], sc_coul, sc_sigma_6, + sc_alpha, sc_power, gpu_lambdaCoulomb); } #endif /*GOMC_CUDA*/ diff --git a/src/GPU/CalculateMinImageCUDAKernel.cuh b/src/GPU/CalculateMinImageCUDAKernel.cuh index f502cdbf6..e79431359 100644 --- a/src/GPU/CalculateMinImageCUDAKernel.cuh +++ b/src/GPU/CalculateMinImageCUDAKernel.cuh @@ -166,7 +166,7 @@ DeviceInRcut(double &distSq, double3 &dist, const double *gpu_x, __device__ inline bool InRcutGPU(double &distSq, const double *x, const double *y, const double *z, uint i, uint j, const double3 &axis, const double3 &halfAx, - double gpu_rCut, int gpu_nonOrth, const double *gpu_cell_x, + double gpu_rCutSq, int gpu_nonOrth, const double *gpu_cell_x, const double *gpu_cell_y, const double *gpu_cell_z, const double *gpu_Invcell_x, const double *gpu_Invcell_y, const double *gpu_Invcell_z) { @@ -183,14 +183,14 @@ InRcutGPU(double &distSq, const double *x, const double *y, const double *z, distSq = dist.x * dist.x + dist.y * dist.y + dist.z * dist.z; - return ((gpu_rCut * gpu_rCut) > distSq); + return (gpu_rCutSq > distSq); } // Call by force calculate to return the distance and virial component __device__ inline bool InRcutGPU(double &distSq, double3 &dist, const double *x, const double *y, const double *z, uint i, uint j, const double3 &axis, - const double3 &halfAx, double gpu_rCut, int gpu_nonOrth, + const double3 &halfAx, double gpu_rCutSq, int gpu_nonOrth, const double *gpu_cell_x, const double *gpu_cell_y, const double *gpu_cell_z, const double *gpu_Invcell_x, const double *gpu_Invcell_y, const double *gpu_Invcell_z) { @@ -205,7 +205,7 @@ InRcutGPU(double &distSq, double3 &dist, const double *x, const double *y, distSq = dist.x * dist.x + dist.y * dist.y + dist.z * dist.z; - return ((gpu_rCut * gpu_rCut) > distSq); + return (gpu_rCutSq > distSq); } __device__ inline int FlatIndexGPU(int i, int j, int gpu_count) { diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cu b/src/GPU/ConstantDefinitionsCUDAKernel.cu index 37a9fd22e..5dadf8356 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cu +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cu @@ -32,9 +32,10 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW, void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, double const *epsilon_Cn, double const *n, int VDW_Kind, - int isMartini, int count, double Rcut, - double const *rCutCoulomb, double RcutLow, double Ron, - double const *alpha, int ewald, double diElectric_1) { + int isMartini, int count, double Rcut, double RcutSq, + double const *rCutCoulomb, double const *rCutCoulombSq, + double RcutLow, double Ron, double const *alpha, + double const *alphaSq, int ewald, double diElectric_1) { int countSq = count * count; CUMALLOC((void **)&vars.gpu_sigmaSq, countSq * sizeof(double)); CUMALLOC((void **)&vars.gpu_epsilon_Cn, countSq * sizeof(double)); @@ -43,10 +44,13 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, CUMALLOC((void **)&vars.gpu_isMartini, sizeof(int)); CUMALLOC((void **)&vars.gpu_count, sizeof(int)); CUMALLOC((void **)&vars.gpu_rCut, sizeof(double)); + CUMALLOC((void **)&vars.gpu_rCutSq, sizeof(double)); CUMALLOC((void **)&vars.gpu_rCutCoulomb, BOX_TOTAL * sizeof(double)); + CUMALLOC((void **)&vars.gpu_rCutCoulombSq, BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_rCutLow, sizeof(double)); CUMALLOC((void **)&vars.gpu_rOn, sizeof(double)); CUMALLOC((void **)&vars.gpu_alpha, BOX_TOTAL * sizeof(double)); + CUMALLOC((void **)&vars.gpu_alphaSq, BOX_TOTAL * sizeof(double)); CUMALLOC((void **)&vars.gpu_ewald, sizeof(int)); CUMALLOC((void **)&vars.gpu_diElectric_1, sizeof(double)); CUMALLOC((void **)&vars.gpu_finalVal, sizeof(double)); @@ -67,13 +71,18 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_count, &count, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCut, &Rcut, sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_rCutSq, &RcutSq, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCutCoulomb, rCutCoulomb, BOX_TOTAL * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_rCutCoulombSq, rCutCoulombSq, BOX_TOTAL * sizeof(double), + cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rCutLow, &RcutLow, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_rOn, &Ron, sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_alpha, alpha, BOX_TOTAL * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(vars.gpu_alphaSq, alphaSq, BOX_TOTAL * sizeof(double), + cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_ewald, &ewald, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(vars.gpu_diElectric_1, &diElectric_1, sizeof(double), cudaMemcpyHostToDevice); diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh index 0e2b92559..094674eef 100644 --- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh +++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh @@ -25,9 +25,10 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW, double *lambdaCoulomb, bool *isFraction); void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq, double const *epsilon_Cn, double const *n, int VDW_Kind, - int isMartini, int count, double Rcut, - double const *rCutCoulomb, double RcutLow, double Ron, - double const *alpha, int ewald, double diElectric_1); + int isMartini, int count, double Rcut, double RcutSq, + double const *rCutCoulomb, double const *rCutCoulombSq, + double RcutLow, double Ron, double const *alpha, + double const *alphaSq, int ewald, double diElectric_1); void InitCoordinatesCUDA(VariablesCUDA *vars, uint maxAtomNumber, uint maxAtomsInMol, uint maxMolNumber); void InitEwaldVariablesCUDA(VariablesCUDA *vars, uint imageTotal); diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh index 9b43a7b74..5df635153 100644 --- a/src/GPU/VariablesCUDA.cuh +++ b/src/GPU/VariablesCUDA.cuh @@ -75,10 +75,13 @@ public: gpu_count = nullptr; gpu_startAtomIdx = nullptr; gpu_rCut = nullptr; + gpu_rCutSq = nullptr; gpu_rCutCoulomb = nullptr; + gpu_rCutCoulombSq = nullptr; gpu_rCutLow = nullptr; gpu_rOn = nullptr; gpu_alpha = nullptr; + gpu_alphaSq = nullptr; gpu_ewald = nullptr; gpu_diElectric_1 = nullptr; gpu_finalVal = nullptr; @@ -129,11 +132,11 @@ public: int *gpu_isMartini; int *gpu_count; int *gpu_startAtomIdx; // start atom index of the molecule - double *gpu_rCut; - double *gpu_rCutCoulomb; + double *gpu_rCut, *gpu_rCutSq; + double *gpu_rCutCoulomb, *gpu_rCutCoulombSq; double *gpu_rCutLow; double *gpu_rOn; - double *gpu_alpha; + double *gpu_alpha, *gpu_alphaSq; int *gpu_ewald; double *gpu_diElectric_1; double *gpu_finalVal;