Skip to content

Commit

Permalink
Remove particleNoCharge array from BoxForceReciprocalGPU and format r…
Browse files Browse the repository at this point in the history
…ecently changed files via clang-format
  • Loading branch information
LSchwiebert committed Jul 28, 2024
1 parent 7151453 commit 0a19327
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 72 deletions.
14 changes: 6 additions & 8 deletions src/Ewald.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1578,9 +1578,7 @@ void Ewald::BoxForceReciprocal(XYZArray const &molCoords,
double constValue = ff.alpha[box] * M_2_SQRTPI;

#ifdef GOMC_CUDA
bool *particleUsed;
particleUsed = new bool[atomForceRec.Count()];
memset((void *)particleUsed, false, atomForceRec.Count() * sizeof(bool));
bool *particleUsed = new bool[atomForceRec.Count()];
#if ENSEMBLE == GEMC || ENSEMBLE == GCMC
memset((void *)particleUsed, false, atomForceRec.Count() * sizeof(bool));
MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(box);
Expand All @@ -1604,11 +1602,11 @@ void Ewald::BoxForceReciprocal(XYZArray const &molCoords,
memset((void *)particleUsed, true, atomForceRec.Count() * sizeof(bool));
#endif

CallBoxForceReciprocalGPU(
ff.particles->getCUDAVars(), atomForceRec, molForceRec, particleCharge,
particleMol, particleHasNoCharge, particleUsed, startMol, lengthMol,
ff.alpha[box], ff.alphaSq[box], constValue, imageSizeRef[box],
molCoords, currentAxes, box);
CallBoxForceReciprocalGPU(ff.particles->getCUDAVars(), atomForceRec,
molForceRec, particleCharge, particleMol,
particleUsed, startMol, lengthMol, ff.alpha[box],
ff.alphaSq[box], constValue, imageSizeRef[box],
molCoords, currentAxes, box);
delete[] particleUsed;
#else
// molecule iterator
Expand Down
23 changes: 13 additions & 10 deletions src/GPU/CalculateEnergyCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
BoxInterGPU<<<blocksPerGrid, threadsPerBlock>>>(
gpu_cellStartIndex, vars->gpu_cellVector, gpu_neighborList, numberOfCells,
vars->gpu_x, vars->gpu_y, vars->gpu_z, axis, halfAx, electrostatic,
vars->gpu_particleCharge, gpu_particleKind, gpu_particleMol, vars->gpu_REn,
vars->gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n,
vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut,
vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha,
vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth,
vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box],
vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
vars->gpu_particleCharge, gpu_particleKind, gpu_particleMol,
vars->gpu_REn, vars->gpu_LJEn, vars->gpu_sigmaSq, vars->gpu_epsilon_Cn,
vars->gpu_n, vars->gpu_VDW_Kind, vars->gpu_isMartini, vars->gpu_count,
vars->gpu_rCut, vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn,
vars->gpu_alpha, vars->gpu_ewald, vars->gpu_diElectric_1,
vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box],
vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
vars->gpu_Invcell_z[box], sc_coul, sc_sigma_6, sc_alpha, sc_power,
vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst, vars->gpu_molIndex,
vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box);
Expand All @@ -104,14 +104,17 @@ void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
#endif

// ReduceSum
DeviceReduce::Sum(vars->cub_energyVec_storage, vars->cub_energyVec_storage_size, vars->gpu_LJEn,
DeviceReduce::Sum(vars->cub_energyVec_storage,
vars->cub_energyVec_storage_size, vars->gpu_LJEn,
vars->gpu_finalVal, energyVectorLen);
cudaMemcpy(&LJEn, vars->gpu_finalVal, sizeof(double), cudaMemcpyDeviceToHost);
if (electrostatic) {
// Real Term ReduceSum
DeviceReduce::Sum(vars->cub_energyVec_storage, vars->cub_energyVec_storage_size, vars->gpu_REn,
DeviceReduce::Sum(vars->cub_energyVec_storage,
vars->cub_energyVec_storage_size, vars->gpu_REn,
vars->gpu_finalVal, energyVectorLen);
cudaMemcpy(&REn, vars->gpu_finalVal, sizeof(double), cudaMemcpyDeviceToHost);
cudaMemcpy(&REn, vars->gpu_finalVal, sizeof(double),
cudaMemcpyDeviceToHost);
} else {
REn = 0.0;
}
Expand Down
61 changes: 23 additions & 38 deletions src/GPU/CalculateEwaldCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -356,33 +356,22 @@ void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, uint box,
void CallBoxForceReciprocalGPU(
VariablesCUDA *vars, XYZArray &atomForceRec, XYZArray &molForceRec,
const std::vector<double> &particleCharge,
const std::vector<int> &particleMol,
const std::vector<bool> &particleHasNoCharge, const bool *particleUsed,
const std::vector<int> &particleMol, const bool *particleUsed,
const std::vector<int> &startMol, const std::vector<int> &lengthMol,
double alpha, double alphaSq, double constValue, uint imageSize,
XYZArray const &molCoords, BoxDimensions const &boxAxes, int box) {
int atomCount = atomForceRec.Count();
int molCount = molForceRec.Count();
int *gpu_particleMol;
bool *gpu_particleHasNoCharge, *gpu_particleUsed;
bool *arr_particleHasNoCharge = new bool[particleHasNoCharge.size()];
bool *gpu_particleUsed;
int *gpu_startMol, *gpu_lengthMol;

// particleHasNoCharge is stored in vector<bool>, so in order to copy it to
// GPU it needs to be stored in bool[]. because: std::vector<bool> : Does not
// necessarily store its elements as a contiguous array
for (int i = 0; i < particleHasNoCharge.size(); i++) {
arr_particleHasNoCharge[i] = particleHasNoCharge[i];
}

// calculate block and grid sizes
dim3 threadsPerBlock(THREADS_PER_BLOCK, 1, 1);
int blocksPerGridX = (int)(atomCount / threadsPerBlock.x) + 1;
int blocksPerGridY = (int)(imageSize / IMAGES_PER_BLOCK) + 1;
dim3 blocksPerGrid(blocksPerGridX, blocksPerGridY, 1);

CUMALLOC((void **)&gpu_particleHasNoCharge,
particleHasNoCharge.size() * sizeof(bool));
CUMALLOC((void **)&gpu_particleUsed, atomCount * sizeof(bool));
CUMALLOC((void **)&gpu_startMol, startMol.size() * sizeof(int));
CUMALLOC((void **)&gpu_lengthMol, lengthMol.size() * sizeof(int));
Expand All @@ -404,8 +393,6 @@ void CallBoxForceReciprocalGPU(
sizeof(double) * particleCharge.size(), cudaMemcpyHostToDevice);
cudaMemcpy(gpu_particleMol, &particleMol[0], sizeof(int) * particleMol.size(),
cudaMemcpyHostToDevice);
cudaMemcpy(gpu_particleHasNoCharge, arr_particleHasNoCharge,
sizeof(bool) * particleHasNoCharge.size(), cudaMemcpyHostToDevice);
cudaMemcpy(gpu_particleUsed, particleUsed, sizeof(bool) * atomCount,
cudaMemcpyHostToDevice);
cudaMemcpy(vars->gpu_x, molCoords.x, sizeof(double) * atomCount,
Expand All @@ -425,16 +412,16 @@ void CallBoxForceReciprocalGPU(
BoxForceReciprocalGPU<<<blocksPerGrid, threadsPerBlock>>>(
vars->gpu_aForceRecx, vars->gpu_aForceRecy, vars->gpu_aForceRecz,
vars->gpu_mForceRecx, vars->gpu_mForceRecy, vars->gpu_mForceRecz,
vars->gpu_particleCharge, gpu_particleMol, gpu_particleHasNoCharge,
gpu_particleUsed, gpu_startMol, gpu_lengthMol, alpha, alphaSq, constValue,
imageSize, vars->gpu_kxRef[box], vars->gpu_kyRef[box],
vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, vars->gpu_z,
vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box],
vars->gpu_isFraction, vars->gpu_molIndex, vars->gpu_lambdaCoulomb,
vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box],
vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
vars->gpu_Invcell_z[box], vars->gpu_nonOrth, boxAxes.GetAxis(box).x,
boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z, box, atomCount);
vars->gpu_particleCharge, gpu_particleMol, gpu_particleUsed, gpu_startMol,
gpu_lengthMol, alpha, alphaSq, constValue, imageSize,
vars->gpu_kxRef[box], vars->gpu_kyRef[box], vars->gpu_kzRef[box],
vars->gpu_x, vars->gpu_y, vars->gpu_z, vars->gpu_prefactRef[box],
vars->gpu_sumRnew[box], vars->gpu_sumInew[box], vars->gpu_isFraction,
vars->gpu_molIndex, vars->gpu_lambdaCoulomb, vars->gpu_cell_x[box],
vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box],
vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth,
boxAxes.GetAxis(box).x, boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z,
box, atomCount);
#ifndef NDEBUG
cudaDeviceSynchronize();
checkLastErrorCUDA(__FILE__, __LINE__);
Expand All @@ -456,8 +443,6 @@ void CallBoxForceReciprocalGPU(
#ifndef NDEBUG
cudaDeviceSynchronize();
#endif
delete[] arr_particleHasNoCharge;
CUFREE(gpu_particleHasNoCharge);
CUFREE(gpu_particleUsed);
CUFREE(gpu_startMol);
CUFREE(gpu_lengthMol);
Expand All @@ -467,16 +452,16 @@ void CallBoxForceReciprocalGPU(
__global__ void BoxForceReciprocalGPU(
double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz,
double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz,
double *gpu_particleCharge, int *gpu_particleMol,
bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol,
int *gpu_lengthMol, double alpha, double alphaSq, double constValue,
int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz,
double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact,
double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction,
int *gpu_molIndex, double *gpu_lambdaCoulomb, double *gpu_cell_x,
double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x,
double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, double axx,
double axy, double axz, int box, int atomCount) {
double *gpu_particleCharge, int *gpu_particleMol, bool *gpu_particleUsed,
int *gpu_startMol, int *gpu_lengthMol, double alpha, double alphaSq,
double constValue, int imageSize, double *gpu_kx, double *gpu_ky,
double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z,
double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew,
bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb,
double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z,
double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z,
int *gpu_nonOrth, double axx, double axy, double axz, int box,
int atomCount) {
__shared__ double shared_kvector[IMAGES_PER_BLOCK * 3];
int particleID = blockDim.x * blockIdx.x + threadIdx.x;
int offset_vector_index = blockIdx.y * IMAGES_PER_BLOCK;
Expand All @@ -495,7 +480,7 @@ __global__ void BoxForceReciprocalGPU(
double forceX = 0.0, forceY = 0.0, forceZ = 0.0;
int moleculeID = gpu_particleMol[particleID];

if (gpu_particleHasNoCharge[particleID])
if (gpu_particleCharge[particleID] == 0.0)
return;

double x = gpu_x[particleID];
Expand Down
23 changes: 11 additions & 12 deletions src/GPU/CalculateEwaldCUDAKernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ along with this program, also can be found at
void CallBoxForceReciprocalGPU(
VariablesCUDA *vars, XYZArray &atomForceRec, XYZArray &molForceRec,
const std::vector<double> &particleCharge,
const std::vector<int> &particleMol,
const std::vector<bool> &particleHasNoCharge, const bool *particleUsed,
const std::vector<int> &particleMol, const bool *particleUsed,
const std::vector<int> &startMol, const std::vector<int> &lengthMol,
double alpha, double alphaSq, double constValue, uint imageSize,
XYZArray const &molCoords, BoxDimensions const &boxAxes, int box);
Expand Down Expand Up @@ -59,16 +58,16 @@ void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize, uint box,
__global__ void BoxForceReciprocalGPU(
double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz,
double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz,
double *gpu_particleCharge, int *gpu_particleMol,
bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol,
int *gpu_lengthMol, double alpha, double alphaSq, double constValue,
int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz,
double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact,
double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction,
int *gpu_molIndex, double *gpu_lambdaCoulomb, double *gpu_cell_x,
double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x,
double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, double axx,
double axy, double axz, int box, int atomCount);
double *gpu_particleCharge, int *gpu_particleMol, bool *gpu_particleUsed,
int *gpu_startMol, int *gpu_lengthMol, double alpha, double alphaSq,
double constValue, int imageSize, double *gpu_kx, double *gpu_ky,
double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z,
double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew,
bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb,
double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z,
double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z,
int *gpu_nonOrth, double axx, double axy, double axz, int box,
int atomCount);

__global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y,
double *gpu_z, double *gpu_kx,
Expand Down
3 changes: 2 additions & 1 deletion src/GPU/ConstantDefinitionsCUDAKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ void UpdateInvCellBasisCUDA(VariablesCUDA *vars, uint box,

void UpdateEnergyVecs(VariablesCUDA *vars, int newVecLen, bool electrostatic) {
// If we haven't exceeded the previous maximum size, we can reuse the storage
if (vars->gpu_energyVecLen >= newVecLen) return;
if (vars->gpu_energyVecLen >= newVecLen)
return;

// Free the current allocations if this isn't the first allocation
if (vars->gpu_energyVecLen > 0) {
Expand Down
6 changes: 3 additions & 3 deletions src/GPU/VariablesCUDA.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ public:
gpu_mForcey = nullptr;
gpu_mForcez = nullptr;
gpu_startAtomIdx = nullptr;
gpu_energyVecLen = 0;
gpu_LJEn = nullptr;
gpu_REn = nullptr;
gpu_energyVecLen = 0;
gpu_LJEn = nullptr;
gpu_REn = nullptr;

// setting lambda values to nullptr
gpu_molIndex = nullptr;
Expand Down

0 comments on commit 0a19327

Please sign in to comment.