Skip to content

Commit

Permalink
clean ups, comments
Browse files Browse the repository at this point in the history
  • Loading branch information
gitpeterwind committed Dec 9, 2023
1 parent 9c447e6 commit 3aa5b89
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 149 deletions.
2 changes: 1 addition & 1 deletion external/upstream/fetch_mrcpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ else()
GIT_REPOSITORY
https://github.com/MRChemSoft/mrcpp.git
GIT_TAG
e491c456fff2552889c5ea500672a42f87e96ff9
162f70722b604c11749b041f9780389da0447d21
)

FetchContent_GetProperties(mrcpp_sources)
Expand Down
53 changes: 50 additions & 3 deletions src/mrdft/Functional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,58 @@ Eigen::MatrixXd Functional::contract_transposed(Eigen::MatrixXd &xc_data, Eigen:
}


/** @brief Make dft potential for a given NodeIndex
/** @brief Evaluates XC functional and derivatives for a given NodeIndex
*
* The electronic densities (total/alpha/beta) are given as input.
* The values of the zero order densities and their gradient are sent to xcfun.
* The output of xcfun must then be combined ("contract") with the gradients
* of the higher order densities.
*
* XCFunctional output (with k=1 and explicit derivatives):
*
* LDA: \f$ \left(F_{xc}, \frac{\partial F_{xc}}{\partial \rho}\right) \f$
*
* GGA: \f$ \left(F_{xc},
* \frac{\partial F_{xc}}{\partial \rho},
* \frac{\partial F_{xc}}{\partial \rho_x},
* \frac{\partial F_{xc}}{\partial \rho_y},
* \frac{\partial F_{xc}}{\partial \rho_z}\right) \f$
*
* Spin LDA: \f$ \left(F_{xc}, \frac{\partial F_{xc}}{\partial \rho^\alpha},
* \frac{\partial F_{xc}}{\partial \rho^\beta}\right) \f$
*
* Spin GGA: \f$ \left(F_{xc},
* \frac{\partial F_{xc}}{\partial \rho^\alpha},
* \frac{\partial F_{xc}}{\partial \rho^\beta},
* \frac{\partial F_{xc}}{\partial \rho_x^\alpha},
* \frac{\partial F_{xc}}{\partial \rho_y^\alpha},
* \frac{\partial F_{xc}}{\partial \rho_z^\alpha},
* \frac{\partial F_{xc}}{\partial \rho_x^\beta},
* \frac{\partial F_{xc}}{\partial \rho_y^\beta},
* \frac{\partial F_{xc}}{\partial \rho_z^\beta}
* \right) \f$
*
* XCFunctional output (with k=1 and gamma-type derivatives):
*
* GGA: \f$ \left(F_{xc},
* \frac{\partial F_{xc}}{\partial \rho},
* \frac{\partial F_{xc}}{\partial \gamma} \f$
*
* Spin GGA: \f$ \left(F_{xc},
* \frac{\partial F_{xc}}{\partial \rho^\alpha},
* \frac{\partial F_{xc}}{\partial \rho^\beta },
* \frac{\partial F_{xc}}{\partial \gamma^{\alpha \alpha}},
* \frac{\partial F_{xc}}{\partial \gamma^{\alpha \beta }},
* \frac{\partial F_{xc}}{\partial \gamma^{\beta \beta }}
* \right) \f$
*
* param[in] inp Input values
* param[out] xcNodes Output values
*
*/
void Functional::makepot(mrcpp::FunctionTreeVector<3> &inp, std::vector<mrcpp::FunctionNode<3> *> xcNodes) const{
void Functional::makepot(mrcpp::FunctionTreeVector<3> &inp, std::vector<mrcpp::FunctionNode<3> *> xcNodes) const {
if (this->log_grad){
MSG_ERROR("log_grad not yet implemented");
MSG_ERROR("log_grad not implemented");
}

mrcpp::NodeIndex<3> nodeIdx = xcNodes[0]->getNodeIndex();
Expand Down Expand Up @@ -191,6 +237,7 @@ void Functional::makepot(mrcpp::FunctionTreeVector<3> &inp, std::vector<mrcpp::F
node.attachCoefs(xcfun_inp.col(spinsize + 3*i + d).data());

mrcpp::DerivativeCalculator<3> derivcalc(d, *this->derivOp, *rho);
// derive rho and put result into xcfun_inp aka node
derivcalc.calcNode(rho->getNode(nodeIdx), node);
// make cv representation of gradient of density
node.mwTransform(mrcpp::Reconstruction);
Expand Down
179 changes: 34 additions & 145 deletions src/mrdft/MRDFT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,172 +65,61 @@ mrcpp::FunctionTreeVector<3> MRDFT::evaluate(mrcpp::FunctionTreeVector<3> &inp)
mrcpp::Timer t_tot, t_pre;
grid().unify(inp);
int nCoefs = mrcpp::get_func(inp, 0).getEndFuncNode(0).getNCoefs();
int nOutCtr = functional().getCtrOutputLength();
int nFcs = functional().getXCOutputLength();

//Note: the other option will be removed after log_grad is implemented
if (not functional().log_grad) {
//Note: we do not need to make the complete energy density (PotVec[0]) for each mpi,
// instead we compute the energy directly.
mrcpp::Timer t_post;
int potvecSize = 2;
if (functional().isSpin()) potvecSize = 3;
mrcpp::FunctionTreeVector<3> PotVec = grid().generate(potvecSize);
int nNodes = grid().size();
int n_start = (mrcpp::mpi::wrk_rank * nNodes) / mrcpp::mpi::wrk_size;
int n_end = ((mrcpp::mpi::wrk_rank + 1) * nNodes) / mrcpp::mpi::wrk_size;
DoubleVector XCenergy = DoubleVector::Zero(1);
#pragma omp parallel
{
#pragma omp for schedule(guided)
for (int n = n_start; n < n_end; n++) {
vector<mrcpp::FunctionNode<3> *> xcNodes = xc_utils::fetch_nodes(n, PotVec);
functional().makepot(inp, xcNodes);
XCenergy[0] += xcNodes[0]->integrate();
}
}
// each mpi only has part of the results. All send their results to bank and then fetch
if(mrcpp::mpi::wrk_size > 1) {
// sum up the energy contrbutions from all mpi
mrcpp::mpi::allreduce_vector(XCenergy, mrcpp::mpi::comm_wrk);
// send to bank
// note that this cannot be done in the omp loop above, because omp threads cannot use mpi
mrcpp::BankAccount PotVecBank; // to put the PotVec
for (int n = n_start; n < n_end; n++) {
vector<mrcpp::FunctionNode<3> *> xcNodes = xc_utils::fetch_nodes(n, PotVec);
//NB: we do not distribute the energy density (i=0). It is not used, since we have XCenergy
for (int i = 1; i < potvecSize; i++)
PotVecBank.put_data(potvecSize*n+i, nCoefs, xcNodes[i]->getCoefs());
}
for (int n = 0; n < nNodes; n++) {
if(n >= n_start and n < n_end) continue; //no need to fetch own results
vector<mrcpp::FunctionNode<3> *> xcNodes = xc_utils::fetch_nodes(n, PotVec);
for (int i = 1; i < potvecSize; i++)
PotVecBank.get_data(potvecSize*n+i, nCoefs, xcNodes[i]->getCoefs());
}
}
functional().clear();
int outNodes = 0;
int outSize = 0;
for (int i = 1; i < PotVec.size(); i++) {
mrcpp::FunctionTree<3> &f_i = mrcpp::get_func(PotVec, i);
f_i.mwTransform(mrcpp::BottomUp);
f_i.calcSquareNorm();
outNodes += f_i.getNNodes();
outSize += f_i.getSizeNodes();
}
this->functional().XCenergy = XCenergy[0];
mrcpp::print::tree(3, "Make potential", outNodes, outSize, t_post.elapsed());
return PotVec;
} else {
//This block will be removed in the future
functional().preprocess(inp);
mrcpp::FunctionTreeVector<3> xcInpVec = functional().setupXCInput();
mrcpp::FunctionTreeVector<3> ctrInpVec = functional().setupCtrInput();

int inpNodes = 0;
int inpSize = 0;
for (auto i = 0; i < xcInpVec.size(); i++) {
auto &f_i = mrcpp::get_func(xcInpVec, i);
inpNodes += f_i.getNNodes();
inpSize += f_i.getSizeNodes();
}
mrcpp::print::tree(3, "Preprocess input", inpNodes, inpSize, t_pre.elapsed());
mrcpp::Timer t_eval;

// divide nNodes into parts assigned to each MPI rank
// Note: we do not need to make the complete energy density (PotVec[0]) for each mpi,
// instead we compute the energy directly.
mrcpp::Timer t_post;
int potvecSize = 2;
if (functional().isSpin()) potvecSize = 3;
mrcpp::FunctionTreeVector<3> PotVec = grid().generate(potvecSize);
int nNodes = grid().size();
// parallelization of loop both with omp (pragma omp for) and
// mpi (each mpi has a portion of the loop, defined by n_start and n_end)
int n_start = (mrcpp::mpi::wrk_rank * nNodes) / mrcpp::mpi::wrk_size;
int n_end = ((mrcpp::mpi::wrk_rank + 1) * nNodes) / mrcpp::mpi::wrk_size;
std::vector<Eigen::MatrixXd> ctrOutDataVec(n_end - n_start);
mrcpp::FunctionTreeVector<3> ctrOutVec;
if (mrcpp::mpi::wrk_size == 1) ctrOutVec = grid().generate(nOutCtr);

DoubleVector XCenergy = DoubleVector::Zero(1);
#pragma omp parallel
{
#pragma omp for schedule(guided)
for (int n = n_start; n < n_end; n++) {
auto xcInpNodes = xc_utils::fetch_nodes(n, xcInpVec);// vector<mrcpp::FunctionNode<3> *>
auto xcInpData = xc_utils::compress_nodes(xcInpNodes);// Eigen::MatrixXd
auto xcOutData = functional().evaluate(xcInpData); // Eigen::MatrixXd
auto ctrInpNodes = xc_utils::fetch_nodes(n, ctrInpVec);
auto ctrInpData = xc_utils::compress_nodes(ctrInpNodes);// Eigen::MatrixXd
auto ctrOutData = functional().contract(xcOutData, ctrInpData);// Eigen::MatrixXd .contract multiplies density and functional derivatives

if (mrcpp::mpi::wrk_size > 1) {
// store the results temporarily
ctrOutDataVec[n - n_start] = std::move(ctrOutData);
} else {
// postprocess the results
auto ctrOutNodes = xc_utils::fetch_nodes(n, ctrOutVec);
xc_utils::expand_nodes(ctrOutNodes, ctrOutData);
}
vector<mrcpp::FunctionNode<3> *> xcNodes = xc_utils::fetch_nodes(n, PotVec);
functional().makepot(inp, xcNodes);
XCenergy[0] += xcNodes[0]->integrate();
}
}

// Input data is cleared before constructing the full output
mrcpp::clear(xcInpVec, false);
mrcpp::clear(ctrInpVec, false);

if (mrcpp::mpi::wrk_size > 1) {
// each MPI process has only a part of the results

ctrOutVec = grid().generate(nOutCtr);
mrcpp::BankAccount ctrOutBank; // to put the ctrOutDataVec;

// note that mpi cannot run in multiple omp threads
int size = nOutCtr * nCoefs;
for (int n = n_start; n < n_end; n++) ctrOutBank.put_data(n, size, ctrOutDataVec[n - n_start].data());
// fetch all nodes from bank and postprocess
// each mpi only has part of the results. All send their results to bank and then fetch
if(mrcpp::mpi::wrk_size > 1) {
// sum up the energy contrbutions from all mpi
mrcpp::mpi::allreduce_vector(XCenergy, mrcpp::mpi::comm_wrk);
// send to bank, note that this cannot be done in the omp loop above,
// because omp threads cannot use mpi
mrcpp::BankAccount PotVecBank; // to put the PotVec
for (int n = n_start; n < n_end; n++) {
vector<mrcpp::FunctionNode<3> *> xcNodes = xc_utils::fetch_nodes(n, PotVec);
//NB: we do not distribute the energy density (i=0). It is not used, since we have XCenergy
for (int i = 1; i < potvecSize; i++)
PotVecBank.put_data(potvecSize*n+i, nCoefs, xcNodes[i]->getCoefs());
}
for (int n = 0; n < nNodes; n++) {
Eigen::MatrixXd ctrOutData(nOutCtr, nCoefs);
ctrOutBank.get_data(n, size, ctrOutData.data());
auto ctrOutNodes = xc_utils::fetch_nodes(n, ctrOutVec);
xc_utils::expand_nodes(ctrOutNodes, ctrOutData);
if(n >= n_start and n < n_end) continue; //no need to fetch own results
vector<mrcpp::FunctionNode<3> *> xcNodes = xc_utils::fetch_nodes(n, PotVec);
for (int i = 1; i < potvecSize; i++)
PotVecBank.get_data(potvecSize*n+i, nCoefs, xcNodes[i]->getCoefs());
}
}

// Reconstruct raw xcfun output functions
/*
for (auto i = 0; i < xcOutVec.size(); i++) {
auto &f_i = mrcpp::get_func(xcOutVec, i);
f_i.mwTransform(mrcpp::BottomUp);
f_i.calcSquareNorm();
}
mrcpp::clear(xcOutVec, true);
*/

// Reconstruct contracted output functions
int ctrNodes = 0;
int ctrSize = 0;
for (auto i = 0; i < ctrOutVec.size(); i++) {
auto &f_i = mrcpp::get_func(ctrOutVec, i);
ctrNodes += f_i.getNNodes();
ctrSize += f_i.getSizeNodes();
f_i.mwTransform(mrcpp::BottomUp);
f_i.calcSquareNorm();
}
mrcpp::print::tree(3, "Evaluate functional", ctrNodes, ctrSize, t_eval.elapsed());
mrcpp::Timer t_post;
auto potOutVec = functional().postprocess(ctrOutVec);
mrcpp::clear(ctrOutVec, true);
this->functional().XCenergy = XCenergy[0];
functional().clear();

int outNodes = 0;
int outSize = 0;

for (auto i = 0; i < potOutVec.size(); i++) {
auto &f_i = mrcpp::get_func(potOutVec, i);
for (int i = 1; i < PotVec.size(); i++) {
mrcpp::FunctionTree<3> &f_i = mrcpp::get_func(PotVec, i);
f_i.mwTransform(mrcpp::BottomUp);
f_i.calcSquareNorm();
// TODO? insert a crop
outNodes += f_i.getNNodes();
outSize += f_i.getSizeNodes();
}

mrcpp::print::tree(3, "Postprocess potential", outNodes, outSize, t_post.elapsed());
return potOutVec;
}
mrcpp::print::tree(3, "Make potential", outNodes, outSize, t_post.elapsed());
return PotVec;
}

} // namespace mrdft

0 comments on commit 3aa5b89

Please sign in to comment.