diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H index bb5172396c..9bdbf9366e 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H @@ -263,6 +263,83 @@ void abec_gsrb (int i, int j, int k, int n, Array4 const& phi, Array4 +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void precompute_abec_gsrb (int i, int j, int k, int n, + Array4 const& pre_comp, + T alpha, Array4 const& a, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, + Array4 const& m0, Array4 const& m2, + Array4 const& m4, + Array4 const& m1, Array4 const& m3, + Array4 const& m5, + Array4 const& f0, Array4 const& f2, + Array4 const& f4, + Array4 const& f1, Array4 const& f3, + Array4 const& f5, + Box const& vbox) noexcept +{ + constexpr T omega = T(1.15); + + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k,n) : T(0.0); + T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) + ? f2(i,j,vlo.z,n) : T(0.0); + T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) + ? f3(vhi.x,j,k,n) : T(0.0); + T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) + ? f4(i,vhi.y,k,n) : T(0.0); + T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) + ? f5(i,j,vhi.z,n) : T(0.0); + + T gamma = alpha*a(i,j,k) + + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n)) + + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n)) + + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n)); + + T g_m_d = gamma + - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) + + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) + + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5)); + + T omega_dif_g_m_d = omega / g_m_d; + + pre_comp(i, j, k, 2*n) = gamma; + pre_comp(i, j, k, 2*n+1) = omega_dif_g_m_d; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void abec_gsrb_v2 (int i, int j, int k, int n, Array4 const& phi, Array4 const& rhs, + Array4 const& pre_comp, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, int redblack) noexcept +{ + if ((i+j+k+redblack)%2 == 0) { + + T gamma = pre_comp(i,j,k,2*n); + T omega_dif_g_m_d = pre_comp(i,j,k,2*n+1); + + T rho = dhx*( bX(i ,j,k,n)*phi(i-1,j,k,n) + + bX(i+1,j,k,n)*phi(i+1,j,k,n) ) + + dhy*( bY(i,j ,k,n)*phi(i,j-1,k,n) + + bY(i,j+1,k,n)*phi(i,j+1,k,n) ) + + dhz*( bZ(i,j,k ,n)*phi(i,j,k-1,n) + + bZ(i,j,k+1,n)*phi(i,j,k+1,n) ); + + T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho); + phi(i,j,k,n) = phi(i,j,k,n) + omega_dif_g_m_d * res; + } +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void abec_gsrb_os (int i, int j, int k, int n, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H index 5a7f926189..7b542e2fbf 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H @@ -155,6 +155,7 @@ public: [[nodiscard]] bool isSingular (int amrlev) const override { return m_is_singular[amrlev]; } [[nodiscard]] bool isBottomSingular () const override { return m_is_singular[0]; } void Fapply (int amrlev, int mglev, MF& out, const MF& in) const final; + void PrecompiteSmoothCoeffs (int amrlev, int mglev); void Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redblack) const final; void FFlux (int amrlev, const MFIter& mfi, const Array& flux, @@ -194,6 +195,7 @@ public: RT m_b_scalar = std::numeric_limits::quiet_NaN(); Vector > m_a_coeffs; Vector > > m_b_coeffs; + Vector > m_pre_comp; bool m_scalars_set = false; bool m_acoef_set = false; @@ -277,15 +279,22 @@ MLABecLaplacianT::define_ab_coeffs () { m_a_coeffs.resize(this->m_num_amr_levels); m_b_coeffs.resize(this->m_num_amr_levels); + m_pre_comp.resize(this->m_num_amr_levels); for (int amrlev = 0; amrlev < this->m_num_amr_levels; ++amrlev) { m_a_coeffs[amrlev].resize(this->m_num_mg_levels[amrlev]); m_b_coeffs[amrlev].resize(this->m_num_mg_levels[amrlev]); + m_pre_comp[amrlev].resize(this->m_num_mg_levels[amrlev]); for (int mglev = 0; mglev < this->m_num_mg_levels[amrlev]; ++mglev) { m_a_coeffs[amrlev][mglev].define (this->m_grids[amrlev][mglev], this->m_dmap[amrlev][mglev], - 1, 0, MFInfo(), *(this->m_factory[amrlev][mglev])); + 1, 0, MFInfo(), *(this->m_factory[amrlev][mglev])); + + const int pre_comp_nghost = 0; + m_pre_comp[amrlev][mglev].define + (this->m_grids[amrlev][mglev], this->m_dmap[amrlev][mglev], + 2, pre_comp_nghost, MFInfo(), *(this->m_factory[amrlev][mglev])); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { const BoxArray& ba = amrex::convert(this->m_grids[amrlev][mglev], @@ -436,6 +445,12 @@ MLABecLaplacianT::prepareForSolve () update_singular_flags(); + for (int amrlev = 0; amrlev < this->m_num_amr_levels; ++amrlev) { + for (int mglev = 0; mglev < this->m_num_mg_levels[amrlev]; ++mglev) { + PrecompiteSmoothCoeffs(amrlev, mglev); + } + } + m_needs_update = false; } @@ -874,6 +889,88 @@ MLABecLaplacianT::Fapply (int amrlev, int mglev, MF& out, const MF& in) cons } } +template +void +MLABecLaplacianT::PrecompiteSmoothCoeffs (int amrlev, int mglev) +{ + BL_PROFILE("MLABecLaplacian::PrecompiteSmoothCoeffs()"); + +#if (AMREX_SPACEDIM == 3) && defined(AMREX_USE_GPU) + bool regular_coarsening = true; + if (amrlev == 0 && mglev > 0) { + regular_coarsening = this->mg_coarsen_ratio_vec[mglev-1] == this->mg_coarsen_ratio; + } + + if (!this->m_use_gauss_seidel || this->m_overset_mask[amrlev][mglev] || + !Gpu::inLaunchRegion() || !regular_coarsening) { + return; + } + + const MF& acoef = m_a_coeffs[amrlev][mglev]; + AMREX_ALWAYS_ASSERT(acoef.nGrowVect() == 0); + + const auto& undrrelxr = this->m_undrrelxr[amrlev][mglev]; + const auto& maskvals = this->m_maskvals [amrlev][mglev]; + + auto& pre_comp = this->m_pre_comp[amrlev][mglev]; + + const int nc = this->getNComp(); + const Real* h = this->m_geom[amrlev][mglev].CellSize(); + AMREX_D_TERM(const RT dhx = m_b_scalar/static_cast(h[0]*h[0]);, + const RT dhy = m_b_scalar/static_cast(h[1]*h[1]);, + const RT dhz = m_b_scalar/static_cast(h[2]*h[2])); + const RT alpha = m_a_scalar; + + const auto& m0ma = maskvals[0].const_arrays(); + const auto& m1ma = maskvals[1].const_arrays(); +#if (AMREX_SPACEDIM > 1) + const auto& m2ma = maskvals[2].const_arrays(); + const auto& m3ma = maskvals[3].const_arrays(); +#if (AMREX_SPACEDIM > 2) + const auto& m4ma = maskvals[4].const_arrays(); + const auto& m5ma = maskvals[5].const_arrays(); +#endif +#endif + + const auto& ama = acoef.const_arrays(); + + AMREX_D_TERM(const auto& bxma = m_b_coeffs[amrlev][mglev][0].const_arrays();, + const auto& byma = m_b_coeffs[amrlev][mglev][1].const_arrays();, + const auto& bzma = m_b_coeffs[amrlev][mglev][2].const_arrays();); + + OrientationIter oitr; + + const auto& f0ma = undrrelxr[oitr()].const_arrays(); ++oitr; + const auto& f1ma = undrrelxr[oitr()].const_arrays(); ++oitr; +#if (AMREX_SPACEDIM > 1) + const auto& f2ma = undrrelxr[oitr()].const_arrays(); ++oitr; + const auto& f3ma = undrrelxr[oitr()].const_arrays(); ++oitr; +#if (AMREX_SPACEDIM > 2) + const auto& f4ma = undrrelxr[oitr()].const_arrays(); ++oitr; + const auto& f5ma = undrrelxr[oitr()].const_arrays(); ++oitr; +#endif +#endif + + const auto& pcma = pre_comp.arrays(); + + ParallelFor(acoef, IntVect(0), nc, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept + { + Box vbx(ama[box_no]); + precompute_abec_gsrb(i,j,k,n, pcma[box_no], alpha, ama[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + AMREX_D_DECL(m0ma[box_no],m2ma[box_no],m4ma[box_no]), + AMREX_D_DECL(m1ma[box_no],m3ma[box_no],m5ma[box_no]), + AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), + AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), + vbx); + }); + + pre_comp.FillBoundary(); +#endif +} + template void MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redblack) const @@ -923,6 +1020,8 @@ MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in #endif #endif + const auto& pre_comp = this->m_pre_comp[amrlev][mglev]; + const int nc = this->getNComp(); const Real* h = this->m_geom[amrlev][mglev].CellSize(); AMREX_D_TERM(const RT dhx = m_b_scalar/static_cast(h[0]*h[0]);, @@ -964,6 +1063,8 @@ MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in #endif #endif + const auto& pcma = pre_comp.const_arrays(); + if (this->m_overset_mask[amrlev][mglev]) { const auto& osmma = this->m_overset_mask[amrlev][mglev]->const_arrays(); if (this->m_use_gauss_seidel) { @@ -1002,6 +1103,12 @@ MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in ParallelFor(sol, IntVect(0), nc, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept { +#if (AMREX_SPACEDIM == 3) + abec_gsrb_v2(i,j,k,n, solnma[box_no], rhsma[box_no], pcma[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + redblack); +#else Box vbx(ama[box_no]); abec_gsrb(i,j,k,n, solnma[box_no], rhsma[box_no], alpha, ama[box_no], AMREX_D_DECL(dhx, dhy, dhz), @@ -1011,6 +1118,7 @@ MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), vbx, redblack); +#endif }); } else { const auto& axma = Ax.const_arrays(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index 64e8dc208c..e7d5f2659b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -1232,13 +1232,26 @@ MLCellLinOpT::smooth (int amrlev, int mglev, MF& sol, const MF& rhs, m_reuse_bc_tag_vector = true; + int ghost_cells_valid = skip_fillboundary ? 1 : 0; + for (int i = 0; i < niter; ++i) { for (int redblack = 0; redblack < 2; ++redblack) { + if (ghost_cells_valid == 0) { + const int ncomp = this->getNComp(); + const int cross = false; //isCrossStencil(); + const int ngrow = sol.nGrowVect().min(); + //std::cout << "ncomp " << ncomp << " ngrow " << ngrow << " cross " << cross << std::endl; + sol.FillBoundary(0, ncomp, IntVect(ngrow), this->m_geom[amrlev][mglev].periodicity(), cross); + ghost_cells_valid = ngrow; + //sol.nGrowVect().min(); + } + applyBC(amrlev, mglev, sol, BCMode::Homogeneous, StateMode::Solution, - nullptr, skip_fillboundary); + nullptr, true); Fsmooth(amrlev, mglev, sol, rhs, redblack); - skip_fillboundary = false; + + --ghost_cells_valid; } }