Skip to content

Commit

Permalink
Fix hpmg for some non-ideal resolutions (#1062)
Browse files Browse the repository at this point in the history
* fix hpmg for some non ideal resolutions

* merge dev
  • Loading branch information
AlexanderSinn authored Feb 22, 2024
1 parent 4a257f4 commit 12eaba9
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 55 deletions.
2 changes: 2 additions & 0 deletions src/mg_solver/HpMultiGrid.H
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ private:
int m_num_mg_levels;
/** Number of single-block-kernel levels */
int m_num_single_block_levels;
/** If the single block kernel should be used */
bool m_use_single_block_kernel = true;

/** Alias to the solution argument passed in solve() */
amrex::FArrayBox m_sol;
Expand Down
125 changes: 70 additions & 55 deletions src/mg_solver/HpMultiGrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,16 @@ MultiGrid::MultiGrid (Real dx, Real dy, Box a_domain)
{ return b.volume() <= n_cell_single*n_cell_single; });
m_single_block_level_begin = std::distance(std::begin(m_domain), r);
m_single_block_level_begin = std::max(1, m_single_block_level_begin);
if (m_single_block_level_begin > m_max_level) {
m_single_block_level_begin = m_max_level;
m_use_single_block_kernel = false;
amrex::Print() << "hpmg: WARNING domain of size "
<< a_domain_len[0] << " " << a_domain_len[1]
<< " cannot be coarsened enough times to be solved efficiently.\n"
<< "hpmg: Size of the final MG level: "
<< m_domain[m_max_level].length(0) << " " << m_domain[m_max_level].length(1) << ".\n"
<< "hpmg: Please consider using a domain size of the form '2^n', '3*2^n', '2^n+1' or '3*n^2+1'.\n";
}
#else
m_single_block_level_begin = m_max_level;
#endif
Expand Down Expand Up @@ -1099,62 +1109,67 @@ MultiGrid::bottomsolve ()
Real dx0 = m_dx * fac;
Real dy0 = m_dy * fac;
#if defined(AMREX_USE_GPU)
Array4<amrex::Real> const* acf = m_acf_a;
Array4<amrex::Real> const* res = m_res_a;
Array4<amrex::Real> const* cor = m_cor_a;
Array4<amrex::Real> const* rescor = m_rescor_a;
int nlevs = m_num_single_block_levels;
int const corner_offset = m_domain[0].cellCentered() ? 0 : 1;

if (m_system_type == 1) {
bottomsolve_gpu<nsweeps>(dx0, dy0, acf, res, cor, rescor, nlevs, corner_offset,
[=] AMREX_GPU_DEVICE (int i, int j, int ilo, int jlo, int ihi, int jhi,
Array4<Real> const& phi, Real rhs0, Real rhs1,
Array4<Real> const& acf, Real facx, Real facy)
{
Real a = acf(i,j,0);
gs1(i, j, 0, ilo, jlo, ihi, jhi, phi, rhs0, a, facx, facy);
gs1(i, j, 1, ilo, jlo, ihi, jhi, phi, rhs1, a, facx, facy);
},
[=] AMREX_GPU_DEVICE (int i, int j, Real& res0, Real& res1,
int ilo, int jlo, int ihi, int jhi,
Array4<Real> const& phi, Real rhs0, Real rhs1,
Array4<Real> const& acf, Real facx, Real facy)
{
Real a = acf(i,j,0);
res0 = residual1(i, j, 0, ilo, jlo, ihi, jhi, phi, rhs0, a, facx, facy);
res1 = residual1(i, j, 1, ilo, jlo, ihi, jhi, phi, rhs1, a, facx, facy);
});
} else {
bottomsolve_gpu<nsweeps>(dx0, dy0, acf, res, cor, rescor, nlevs, corner_offset,
[=] AMREX_GPU_DEVICE (int i, int j, int ilo, int jlo, int ihi, int jhi,
Array4<Real> const& phi, Real rhs0, Real rhs1,
Array4<Real> const& acf, Real facx, Real facy)
{
Real ar = acf(i,j,0,0);
Real ai = acf(i,j,0,1);
gs2(i, j, ilo, jlo, ihi, jhi, phi, rhs0, rhs1, ar, ai, facx, facy);
},
[=] AMREX_GPU_DEVICE (int i, int j, Real& res0, Real& res1,
int ilo, int jlo, int ihi, int jhi,
Array4<Real> const& phi, Real rhs_r, Real rhs_i,
Array4<Real> const& acf, Real facx, Real facy)
{
Real ar = acf(i,j,0,0);
Real ai = acf(i,j,0,1);
res0 = residual2r(i, j, ilo, jlo, ihi, jhi, phi, rhs_r, ar, ai, facx, facy);
res1 = residual2i(i, j, ilo, jlo, ihi, jhi, phi, rhs_i, ar, ai, facx, facy);
});
}
#else
const int ilev = m_single_block_level_begin;
m_cor[ilev].setVal(Real(0.));
for (int is = 0; is < nsweeps; ++is) {
gsrb(is, m_domain[ilev], m_cor[ilev].array(),
m_res[ilev].const_array(), m_acf[ilev].const_array(), dx0, dy0,
m_system_type);
}
if (m_use_single_block_kernel) {
Array4<amrex::Real> const* acf = m_acf_a;
Array4<amrex::Real> const* res = m_res_a;
Array4<amrex::Real> const* cor = m_cor_a;
Array4<amrex::Real> const* rescor = m_rescor_a;
int nlevs = m_num_single_block_levels;
int const corner_offset = m_domain[0].cellCentered() ? 0 : 1;

if (m_system_type == 1) {
bottomsolve_gpu<nsweeps>(dx0, dy0, acf, res, cor, rescor, nlevs, corner_offset,
[=] AMREX_GPU_DEVICE (int i, int j, int ilo, int jlo, int ihi, int jhi,
Array4<Real> const& phi, Real rhs0, Real rhs1,
Array4<Real> const& acf, Real facx, Real facy)
{
Real a = acf(i,j,0);
gs1(i, j, 0, ilo, jlo, ihi, jhi, phi, rhs0, a, facx, facy);
gs1(i, j, 1, ilo, jlo, ihi, jhi, phi, rhs1, a, facx, facy);
},
[=] AMREX_GPU_DEVICE (int i, int j, Real& res0, Real& res1,
int ilo, int jlo, int ihi, int jhi,
Array4<Real> const& phi, Real rhs0, Real rhs1,
Array4<Real> const& acf, Real facx, Real facy)
{
Real a = acf(i,j,0);
res0 = residual1(i, j, 0, ilo, jlo, ihi, jhi, phi, rhs0, a, facx, facy);
res1 = residual1(i, j, 1, ilo, jlo, ihi, jhi, phi, rhs1, a, facx, facy);
});
} else {
bottomsolve_gpu<nsweeps>(dx0, dy0, acf, res, cor, rescor, nlevs, corner_offset,
[=] AMREX_GPU_DEVICE (int i, int j, int ilo, int jlo, int ihi, int jhi,
Array4<Real> const& phi, Real rhs0, Real rhs1,
Array4<Real> const& acf, Real facx, Real facy)
{
Real ar = acf(i,j,0,0);
Real ai = acf(i,j,0,1);
gs2(i, j, ilo, jlo, ihi, jhi, phi, rhs0, rhs1, ar, ai, facx, facy);
},
[=] AMREX_GPU_DEVICE (int i, int j, Real& res0, Real& res1,
int ilo, int jlo, int ihi, int jhi,
Array4<Real> const& phi, Real rhs_r, Real rhs_i,
Array4<Real> const& acf, Real facx, Real facy)
{
Real ar = acf(i,j,0,0);
Real ai = acf(i,j,0,1);
res0 = residual2r(i, j, ilo, jlo, ihi, jhi, phi, rhs_r, ar, ai, facx, facy);
res1 = residual2i(i, j, ilo, jlo, ihi, jhi, phi, rhs_i, ar, ai, facx, facy);
});
}
} else
#endif
{
const int ilev = m_single_block_level_begin;
m_cor[ilev].setVal<amrex::RunOn::Device>(Real(0.));
// Use numsweeps equal to the box length rounded up to an even number for large boxes
int numsweeps = std::max(nsweeps, (m_cor[ilev].box().length().max() + 1) / 2 * 2);
for (int is = 0; is < numsweeps; ++is) {
gsrb(is, m_domain[ilev], m_cor[ilev].array(),
m_res[ilev].const_array(), m_acf[ilev].const_array(), dx0, dy0,
m_system_type);
}
}
}

#if defined(AMREX_USE_GPU)
Expand Down

0 comments on commit 12eaba9

Please sign in to comment.