Skip to content

Commit

Permalink
add abec_gsrb_v2
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderSinn committed Feb 21, 2025
1 parent 1ed47dd commit 36a4b87
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 3 deletions.
77 changes: 77 additions & 0 deletions Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,83 @@ void abec_gsrb (int i, int j, int k, int n, Array4<T> const& phi, Array4<T const
}
}

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void precompute_abec_gsrb (int i, int j, int k, int n,
Array4<T> const& pre_comp,
T alpha, Array4<T const> const& a,
T dhx, T dhy, T dhz,
Array4<T const> const& bX, Array4<T const> const& bY,
Array4<T const> const& bZ,
Array4<int const> const& m0, Array4<int const> const& m2,
Array4<int const> const& m4,
Array4<int const> const& m1, Array4<int const> const& m3,
Array4<int const> const& m5,
Array4<T const> const& f0, Array4<T const> const& f2,
Array4<T const> const& f4,
Array4<T const> const& f1, Array4<T const> const& f3,
Array4<T const> const& f5,
Box const& vbox) noexcept
{
constexpr T omega = T(1.15);

const auto vlo = amrex::lbound(vbox);
const auto vhi = amrex::ubound(vbox);

T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
? f0(vlo.x,j,k,n) : T(0.0);
T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
? f1(i,vlo.y,k,n) : T(0.0);
T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
? f2(i,j,vlo.z,n) : T(0.0);
T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
? f3(vhi.x,j,k,n) : T(0.0);
T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
? f4(i,vhi.y,k,n) : T(0.0);
T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
? f5(i,j,vhi.z,n) : T(0.0);

T gamma = alpha*a(i,j,k)
+ dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
+ dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
+ dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));

T g_m_d = gamma
- (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
+ dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
+ dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));

T omega_dif_g_m_d = omega / g_m_d;

pre_comp(i, j, k, 2*n) = gamma;
pre_comp(i, j, k, 2*n+1) = omega_dif_g_m_d;
}

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void abec_gsrb_v2 (int i, int j, int k, int n, Array4<T> const& phi, Array4<T const> const& rhs,
Array4<T const> const& pre_comp,
T dhx, T dhy, T dhz,
Array4<T const> const& bX, Array4<T const> const& bY,
Array4<T const> const& bZ, int redblack) noexcept
{
if ((i+j+k+redblack)%2 == 0) {

T gamma = pre_comp(i,j,k,2*n);
T omega_dif_g_m_d = pre_comp(i,j,k,2*n+1);

T rho = dhx*( bX(i ,j,k,n)*phi(i-1,j,k,n)
+ bX(i+1,j,k,n)*phi(i+1,j,k,n) )
+ dhy*( bY(i,j ,k,n)*phi(i,j-1,k,n)
+ bY(i,j+1,k,n)*phi(i,j+1,k,n) )
+ dhz*( bZ(i,j,k ,n)*phi(i,j,k-1,n)
+ bZ(i,j,k+1,n)*phi(i,j,k+1,n) );

T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho);
phi(i,j,k,n) = phi(i,j,k,n) + omega_dif_g_m_d * res;
}
}

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void abec_gsrb_os (int i, int j, int k, int n,
Expand Down
110 changes: 109 additions & 1 deletion Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ public:
[[nodiscard]] bool isSingular (int amrlev) const override { return m_is_singular[amrlev]; }
[[nodiscard]] bool isBottomSingular () const override { return m_is_singular[0]; }
void Fapply (int amrlev, int mglev, MF& out, const MF& in) const final;
void PrecompiteSmoothCoeffs (int amrlev, int mglev);
void Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redblack) const final;
void FFlux (int amrlev, const MFIter& mfi,
const Array<FAB*,AMREX_SPACEDIM>& flux,
Expand Down Expand Up @@ -194,6 +195,7 @@ public:
RT m_b_scalar = std::numeric_limits<RT>::quiet_NaN();
Vector<Vector<MF> > m_a_coeffs;
Vector<Vector<Array<MF,AMREX_SPACEDIM> > > m_b_coeffs;
Vector<Vector<MF> > m_pre_comp;

bool m_scalars_set = false;
bool m_acoef_set = false;
Expand Down Expand Up @@ -277,15 +279,22 @@ MLABecLaplacianT<MF>::define_ab_coeffs ()
{
m_a_coeffs.resize(this->m_num_amr_levels);
m_b_coeffs.resize(this->m_num_amr_levels);
m_pre_comp.resize(this->m_num_amr_levels);
for (int amrlev = 0; amrlev < this->m_num_amr_levels; ++amrlev)
{
m_a_coeffs[amrlev].resize(this->m_num_mg_levels[amrlev]);
m_b_coeffs[amrlev].resize(this->m_num_mg_levels[amrlev]);
m_pre_comp[amrlev].resize(this->m_num_mg_levels[amrlev]);
for (int mglev = 0; mglev < this->m_num_mg_levels[amrlev]; ++mglev)
{
m_a_coeffs[amrlev][mglev].define
(this->m_grids[amrlev][mglev], this->m_dmap[amrlev][mglev],
1, 0, MFInfo(), *(this->m_factory[amrlev][mglev]));
1, 0, MFInfo(), *(this->m_factory[amrlev][mglev]));

const int pre_comp_nghost = 0;
m_pre_comp[amrlev][mglev].define
(this->m_grids[amrlev][mglev], this->m_dmap[amrlev][mglev],
2, pre_comp_nghost, MFInfo(), *(this->m_factory[amrlev][mglev]));
for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
{
const BoxArray& ba = amrex::convert(this->m_grids[amrlev][mglev],
Expand Down Expand Up @@ -436,6 +445,12 @@ MLABecLaplacianT<MF>::prepareForSolve ()

update_singular_flags();

for (int amrlev = 0; amrlev < this->m_num_amr_levels; ++amrlev) {
for (int mglev = 0; mglev < this->m_num_mg_levels[amrlev]; ++mglev) {
PrecompiteSmoothCoeffs(amrlev, mglev);
}
}

m_needs_update = false;
}

Expand Down Expand Up @@ -874,6 +889,88 @@ MLABecLaplacianT<MF>::Fapply (int amrlev, int mglev, MF& out, const MF& in) cons
}
}

template <typename MF>
void
MLABecLaplacianT<MF>::PrecompiteSmoothCoeffs (int amrlev, int mglev)
{
BL_PROFILE("MLABecLaplacian::PrecompiteSmoothCoeffs()");

#if (AMREX_SPACEDIM == 3) && defined(AMREX_USE_GPU)
bool regular_coarsening = true;
if (amrlev == 0 && mglev > 0) {
regular_coarsening = this->mg_coarsen_ratio_vec[mglev-1] == this->mg_coarsen_ratio;
}

if (!this->m_use_gauss_seidel || this->m_overset_mask[amrlev][mglev] ||
!Gpu::inLaunchRegion() || !regular_coarsening) {
return;
}

const MF& acoef = m_a_coeffs[amrlev][mglev];
AMREX_ALWAYS_ASSERT(acoef.nGrowVect() == 0);

const auto& undrrelxr = this->m_undrrelxr[amrlev][mglev];
const auto& maskvals = this->m_maskvals [amrlev][mglev];

auto& pre_comp = this->m_pre_comp[amrlev][mglev];

const int nc = this->getNComp();
const Real* h = this->m_geom[amrlev][mglev].CellSize();
AMREX_D_TERM(const RT dhx = m_b_scalar/static_cast<RT>(h[0]*h[0]);,
const RT dhy = m_b_scalar/static_cast<RT>(h[1]*h[1]);,
const RT dhz = m_b_scalar/static_cast<RT>(h[2]*h[2]));
const RT alpha = m_a_scalar;

const auto& m0ma = maskvals[0].const_arrays();
const auto& m1ma = maskvals[1].const_arrays();
#if (AMREX_SPACEDIM > 1)
const auto& m2ma = maskvals[2].const_arrays();
const auto& m3ma = maskvals[3].const_arrays();
#if (AMREX_SPACEDIM > 2)
const auto& m4ma = maskvals[4].const_arrays();
const auto& m5ma = maskvals[5].const_arrays();
#endif
#endif

const auto& ama = acoef.const_arrays();

AMREX_D_TERM(const auto& bxma = m_b_coeffs[amrlev][mglev][0].const_arrays();,
const auto& byma = m_b_coeffs[amrlev][mglev][1].const_arrays();,
const auto& bzma = m_b_coeffs[amrlev][mglev][2].const_arrays(););

OrientationIter oitr;

const auto& f0ma = undrrelxr[oitr()].const_arrays(); ++oitr;
const auto& f1ma = undrrelxr[oitr()].const_arrays(); ++oitr;
#if (AMREX_SPACEDIM > 1)
const auto& f2ma = undrrelxr[oitr()].const_arrays(); ++oitr;
const auto& f3ma = undrrelxr[oitr()].const_arrays(); ++oitr;
#if (AMREX_SPACEDIM > 2)
const auto& f4ma = undrrelxr[oitr()].const_arrays(); ++oitr;
const auto& f5ma = undrrelxr[oitr()].const_arrays(); ++oitr;
#endif
#endif

const auto& pcma = pre_comp.arrays();

ParallelFor(acoef, IntVect(0), nc,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
Box vbx(ama[box_no]);
precompute_abec_gsrb(i,j,k,n, pcma[box_no], alpha, ama[box_no],
AMREX_D_DECL(dhx, dhy, dhz),
AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]),
AMREX_D_DECL(m0ma[box_no],m2ma[box_no],m4ma[box_no]),
AMREX_D_DECL(m1ma[box_no],m3ma[box_no],m5ma[box_no]),
AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]),
AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]),
vbx);
});

pre_comp.FillBoundary();
#endif
}

template <typename MF>
void
MLABecLaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redblack) const
Expand Down Expand Up @@ -923,6 +1020,8 @@ MLABecLaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in
#endif
#endif

const auto& pre_comp = this->m_pre_comp[amrlev][mglev];

const int nc = this->getNComp();
const Real* h = this->m_geom[amrlev][mglev].CellSize();
AMREX_D_TERM(const RT dhx = m_b_scalar/static_cast<RT>(h[0]*h[0]);,
Expand Down Expand Up @@ -964,6 +1063,8 @@ MLABecLaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in
#endif
#endif

const auto& pcma = pre_comp.const_arrays();

if (this->m_overset_mask[amrlev][mglev]) {
const auto& osmma = this->m_overset_mask[amrlev][mglev]->const_arrays();
if (this->m_use_gauss_seidel) {
Expand Down Expand Up @@ -1002,6 +1103,12 @@ MLABecLaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in
ParallelFor(sol, IntVect(0), nc,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
#if (AMREX_SPACEDIM == 3)
abec_gsrb_v2(i,j,k,n, solnma[box_no], rhsma[box_no], pcma[box_no],
AMREX_D_DECL(dhx, dhy, dhz),
AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]),
redblack);
#else
Box vbx(ama[box_no]);
abec_gsrb(i,j,k,n, solnma[box_no], rhsma[box_no], alpha, ama[box_no],
AMREX_D_DECL(dhx, dhy, dhz),
Expand All @@ -1011,6 +1118,7 @@ MLABecLaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in
AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]),
AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]),
vbx, redblack);
#endif
});
} else {
const auto& axma = Ax.const_arrays();
Expand Down
17 changes: 15 additions & 2 deletions Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H
Original file line number Diff line number Diff line change
Expand Up @@ -1232,13 +1232,26 @@ MLCellLinOpT<MF>::smooth (int amrlev, int mglev, MF& sol, const MF& rhs,

m_reuse_bc_tag_vector = true;

int ghost_cells_valid = skip_fillboundary ? 1 : 0;

for (int i = 0; i < niter; ++i) {
for (int redblack = 0; redblack < 2; ++redblack)
{
if (ghost_cells_valid == 0) {
const int ncomp = this->getNComp();
const int cross = false; //isCrossStencil();
const int ngrow = sol.nGrowVect().min();
//std::cout << "ncomp " << ncomp << " ngrow " << ngrow << " cross " << cross << std::endl;
sol.FillBoundary(0, ncomp, IntVect(ngrow), this->m_geom[amrlev][mglev].periodicity(), cross);
ghost_cells_valid = ngrow;
//sol.nGrowVect().min();
}

applyBC(amrlev, mglev, sol, BCMode::Homogeneous, StateMode::Solution,
nullptr, skip_fillboundary);
nullptr, true);
Fsmooth(amrlev, mglev, sol, rhs, redblack);
skip_fillboundary = false;

--ghost_cells_valid;
}
}

Expand Down

0 comments on commit 36a4b87

Please sign in to comment.