From 12eaba99b7ad4bbc9fe28f07d72d98373e9c9ab2 Mon Sep 17 00:00:00 2001 From: Alexander Sinn <64009254+AlexanderSinn@users.noreply.github.com> Date: Thu, 22 Feb 2024 18:33:08 +0100 Subject: [PATCH] Fix hpmg for some non-ideal resolutions (#1062) * fix hpmg for some non ideal resolutions * merge dev --- src/mg_solver/HpMultiGrid.H | 2 + src/mg_solver/HpMultiGrid.cpp | 125 +++++++++++++++++++--------------- 2 files changed, 72 insertions(+), 55 deletions(-) diff --git a/src/mg_solver/HpMultiGrid.H b/src/mg_solver/HpMultiGrid.H index b9e90ff152..5f08ce0692 100644 --- a/src/mg_solver/HpMultiGrid.H +++ b/src/mg_solver/HpMultiGrid.H @@ -174,6 +174,8 @@ private: int m_num_mg_levels; /** Number of single-block-kernel levels */ int m_num_single_block_levels; + /** If the single block kernel should be used */ + bool m_use_single_block_kernel = true; /** Alias to the solution argument passed in solve() */ amrex::FArrayBox m_sol; diff --git a/src/mg_solver/HpMultiGrid.cpp b/src/mg_solver/HpMultiGrid.cpp index 5afdefb128..f0a5f969f0 100644 --- a/src/mg_solver/HpMultiGrid.cpp +++ b/src/mg_solver/HpMultiGrid.cpp @@ -693,6 +693,16 @@ MultiGrid::MultiGrid (Real dx, Real dy, Box a_domain) { return b.volume() <= n_cell_single*n_cell_single; }); m_single_block_level_begin = std::distance(std::begin(m_domain), r); m_single_block_level_begin = std::max(1, m_single_block_level_begin); + if (m_single_block_level_begin > m_max_level) { + m_single_block_level_begin = m_max_level; + m_use_single_block_kernel = false; + amrex::Print() << "hpmg: WARNING domain of size " + << a_domain_len[0] << " " << a_domain_len[1] + << " cannot be coarsened enough times to be solved efficiently.\n" + << "hpmg: Size of the final MG level: " + << m_domain[m_max_level].length(0) << " " << m_domain[m_max_level].length(1) << ".\n" + << "hpmg: Please consider using a domain size of the form '2^n', '3*2^n', '2^n+1' or '3*n^2+1'.\n"; + } #else m_single_block_level_begin = m_max_level; #endif @@ -1099,62 +1109,67 @@ MultiGrid::bottomsolve () Real dx0 = m_dx * fac; Real dy0 = m_dy * fac; #if defined(AMREX_USE_GPU) - Array4 const* acf = m_acf_a; - Array4 const* res = m_res_a; - Array4 const* cor = m_cor_a; - Array4 const* rescor = m_rescor_a; - int nlevs = m_num_single_block_levels; - int const corner_offset = m_domain[0].cellCentered() ? 0 : 1; - - if (m_system_type == 1) { - bottomsolve_gpu(dx0, dy0, acf, res, cor, rescor, nlevs, corner_offset, - [=] AMREX_GPU_DEVICE (int i, int j, int ilo, int jlo, int ihi, int jhi, - Array4 const& phi, Real rhs0, Real rhs1, - Array4 const& acf, Real facx, Real facy) - { - Real a = acf(i,j,0); - gs1(i, j, 0, ilo, jlo, ihi, jhi, phi, rhs0, a, facx, facy); - gs1(i, j, 1, ilo, jlo, ihi, jhi, phi, rhs1, a, facx, facy); - }, - [=] AMREX_GPU_DEVICE (int i, int j, Real& res0, Real& res1, - int ilo, int jlo, int ihi, int jhi, - Array4 const& phi, Real rhs0, Real rhs1, - Array4 const& acf, Real facx, Real facy) - { - Real a = acf(i,j,0); - res0 = residual1(i, j, 0, ilo, jlo, ihi, jhi, phi, rhs0, a, facx, facy); - res1 = residual1(i, j, 1, ilo, jlo, ihi, jhi, phi, rhs1, a, facx, facy); - }); - } else { - bottomsolve_gpu(dx0, dy0, acf, res, cor, rescor, nlevs, corner_offset, - [=] AMREX_GPU_DEVICE (int i, int j, int ilo, int jlo, int ihi, int jhi, - Array4 const& phi, Real rhs0, Real rhs1, - Array4 const& acf, Real facx, Real facy) - { - Real ar = acf(i,j,0,0); - Real ai = acf(i,j,0,1); - gs2(i, j, ilo, jlo, ihi, jhi, phi, rhs0, rhs1, ar, ai, facx, facy); - }, - [=] AMREX_GPU_DEVICE (int i, int j, Real& res0, Real& res1, - int ilo, int jlo, int ihi, int jhi, - Array4 const& phi, Real rhs_r, Real rhs_i, - Array4 const& acf, Real facx, Real facy) - { - Real ar = acf(i,j,0,0); - Real ai = acf(i,j,0,1); - res0 = residual2r(i, j, ilo, jlo, ihi, jhi, phi, rhs_r, ar, ai, facx, facy); - res1 = residual2i(i, j, ilo, jlo, ihi, jhi, phi, rhs_i, ar, ai, facx, facy); - }); - } -#else - const int ilev = m_single_block_level_begin; - m_cor[ilev].setVal(Real(0.)); - for (int is = 0; is < nsweeps; ++is) { - gsrb(is, m_domain[ilev], m_cor[ilev].array(), - m_res[ilev].const_array(), m_acf[ilev].const_array(), dx0, dy0, - m_system_type); - } + if (m_use_single_block_kernel) { + Array4 const* acf = m_acf_a; + Array4 const* res = m_res_a; + Array4 const* cor = m_cor_a; + Array4 const* rescor = m_rescor_a; + int nlevs = m_num_single_block_levels; + int const corner_offset = m_domain[0].cellCentered() ? 0 : 1; + + if (m_system_type == 1) { + bottomsolve_gpu(dx0, dy0, acf, res, cor, rescor, nlevs, corner_offset, + [=] AMREX_GPU_DEVICE (int i, int j, int ilo, int jlo, int ihi, int jhi, + Array4 const& phi, Real rhs0, Real rhs1, + Array4 const& acf, Real facx, Real facy) + { + Real a = acf(i,j,0); + gs1(i, j, 0, ilo, jlo, ihi, jhi, phi, rhs0, a, facx, facy); + gs1(i, j, 1, ilo, jlo, ihi, jhi, phi, rhs1, a, facx, facy); + }, + [=] AMREX_GPU_DEVICE (int i, int j, Real& res0, Real& res1, + int ilo, int jlo, int ihi, int jhi, + Array4 const& phi, Real rhs0, Real rhs1, + Array4 const& acf, Real facx, Real facy) + { + Real a = acf(i,j,0); + res0 = residual1(i, j, 0, ilo, jlo, ihi, jhi, phi, rhs0, a, facx, facy); + res1 = residual1(i, j, 1, ilo, jlo, ihi, jhi, phi, rhs1, a, facx, facy); + }); + } else { + bottomsolve_gpu(dx0, dy0, acf, res, cor, rescor, nlevs, corner_offset, + [=] AMREX_GPU_DEVICE (int i, int j, int ilo, int jlo, int ihi, int jhi, + Array4 const& phi, Real rhs0, Real rhs1, + Array4 const& acf, Real facx, Real facy) + { + Real ar = acf(i,j,0,0); + Real ai = acf(i,j,0,1); + gs2(i, j, ilo, jlo, ihi, jhi, phi, rhs0, rhs1, ar, ai, facx, facy); + }, + [=] AMREX_GPU_DEVICE (int i, int j, Real& res0, Real& res1, + int ilo, int jlo, int ihi, int jhi, + Array4 const& phi, Real rhs_r, Real rhs_i, + Array4 const& acf, Real facx, Real facy) + { + Real ar = acf(i,j,0,0); + Real ai = acf(i,j,0,1); + res0 = residual2r(i, j, ilo, jlo, ihi, jhi, phi, rhs_r, ar, ai, facx, facy); + res1 = residual2i(i, j, ilo, jlo, ihi, jhi, phi, rhs_i, ar, ai, facx, facy); + }); + } + } else #endif + { + const int ilev = m_single_block_level_begin; + m_cor[ilev].setVal(Real(0.)); + // Use numsweeps equal to the box length rounded up to an even number for large boxes + int numsweeps = std::max(nsweeps, (m_cor[ilev].box().length().max() + 1) / 2 * 2); + for (int is = 0; is < numsweeps; ++is) { + gsrb(is, m_domain[ilev], m_cor[ilev].array(), + m_res[ilev].const_array(), m_acf[ilev].const_array(), dx0, dy0, + m_system_type); + } + } } #if defined(AMREX_USE_GPU)