From 89352170b8a993f7d9e67bf2369da541c8502470 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 29 Sep 2023 17:12:07 -0700 Subject: [PATCH 001/186] Documentation: Fix default values (#3562) Boolean flag `amrex.use_gpu_aware_mpi` has `int` as its C++ type. So its default value should be 0 not False. For some other boolean flags with `bool` as their C++ type, the default value should be `false`, not `False`. --- Docs/sphinx_documentation/source/GPU.rst | 12 ++++++------ Docs/sphinx_documentation/source/InputsPlotFiles.rst | 2 +- Docs/sphinx_documentation/source/LinearSolvers.rst | 2 +- Docs/sphinx_documentation/source/Particle.rst | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index 1391015f31e..56d540e63c4 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -1738,14 +1738,14 @@ by "amrex" in your :cpp:`inputs` file. +----------------------------+-----------------------------------------------------------------------+-------------+----------+ | | Description | Type | Default | +============================+=======================================================================+=============+==========+ -| use_gpu_aware_mpi | Whether to use GPU memory for communication buffers during MPI calls. | Bool | False | -| | If true, the buffers will use device memory. If false, they will use | | | -| | pinned memory. In practice, we find it is usually not worth it to use | | | -| | GPU aware MPI. | | | +| use_gpu_aware_mpi | Whether to use GPU memory for communication buffers during MPI calls. | Bool | 0 | +| | If true, the buffers will use device memory. If false (i.e., 0), they | | | +| | will use pinned memory. In practice, we find it is not always worth | | | +| | it to use GPU aware MPI. | | | +----------------------------+-----------------------------------------------------------------------+-------------+----------+ -| abort_on_out_of_gpu_memory | If the size of free memory on the GPU is less than the size of a | Bool | False | +| abort_on_out_of_gpu_memory | If the size of free memory on the GPU is less than the size of a | Bool | 0 | | | requested allocation, AMReX will call AMReX::Abort() with an error | | | | | describing how much free memory there is and what was requested. | | | +----------------------------+-----------------------------------------------------------------------+-------------+----------+ -| the_arena_is_managed | Whether :cpp:`The_Arena()` allocates managed memory. | Bool | False | +| the_arena_is_managed | Whether :cpp:`The_Arena()` allocates managed memory. | Bool | 0 | +----------------------------+-----------------------------------------------------------------------+-------------+----------+ diff --git a/Docs/sphinx_documentation/source/InputsPlotFiles.rst b/Docs/sphinx_documentation/source/InputsPlotFiles.rst index 64b202ebf5c..9e8789a90ac 100644 --- a/Docs/sphinx_documentation/source/InputsPlotFiles.rst +++ b/Docs/sphinx_documentation/source/InputsPlotFiles.rst @@ -12,7 +12,7 @@ as whether a plotfile should be written out immediately after restarting a simul | plot_int | Frequency of plotfile output; | Int | -1 | | | if -1 then no plotfiles will be written | | | +---------------------+-----------------------------------------------------------------------+-------------+-----------+ -| plotfile_on_restart | Should we write a plotfile when we restart (only used if plot_int>0) | Bool | False | +| plotfile_on_restart | Should we write a plotfile when we restart (only used if plot_int>0) | Bool | 0 (false) | +---------------------+-----------------------------------------------------------------------+-------------+-----------+ | plot_file | Prefix to use for plotfile output | String | plt | +---------------------+-----------------------------------------------------------------------+-------------+-----------+ diff --git a/Docs/sphinx_documentation/source/LinearSolvers.rst b/Docs/sphinx_documentation/source/LinearSolvers.rst index 87048bc8195..e7266f02243 100644 --- a/Docs/sphinx_documentation/source/LinearSolvers.rst +++ b/Docs/sphinx_documentation/source/LinearSolvers.rst @@ -565,7 +565,7 @@ The following parameter should be set to True if the problem to be solved has a In this case, the solution is only defined to within a constant. Setting this parameter to True replaces one row in the matrix sent to hypre from AMReX by a row that sets the value at one cell to 0. -- :cpp:`hypre.adjust_singular_matrix`: Default is False. +- :cpp:`hypre.adjust_singular_matrix`: Default is false. The following parameters can be set in the inputs file to control the choice of preconditioner and smoother: diff --git a/Docs/sphinx_documentation/source/Particle.rst b/Docs/sphinx_documentation/source/Particle.rst index da5fabb02b2..be8292c772b 100644 --- a/Docs/sphinx_documentation/source/Particle.rst +++ b/Docs/sphinx_documentation/source/Particle.rst @@ -713,7 +713,7 @@ with OpenMP, the first thing to look at is whether there are enough tiles availa +-------------------+-----------------------------------------------------------------------+-------------+-------------+ | | Description | Type | Default | +===================+=======================================================================+=============+=============+ -| do_tiling | Whether to use tiling for particles. Should be on when using OpenMP, | Bool | False | +| do_tiling | Whether to use tiling for particles. Should be on when using OpenMP, | Bool | false | | | and off when running on GPUs. | | | +-------------------+-----------------------------------------------------------------------+-------------+-------------+ | tile_size | If tiling is on, the maximum tile_size to in each direction | Ints | 1024000,8,8 | @@ -739,7 +739,7 @@ problems with particle IO, you could try varying some / all of these parameters. | datadigits_read | This for backwards compatibility, don't use unless you need to read | Int | 5 | | | and old (pre mid 2017) AMReX dataset. | | | +-------------------+-----------------------------------------------------------------------+-------------+-------------+ -| use_prepost | This is an optimization for large particle datasets that groups MPI | Bool | False | +| use_prepost | This is an optimization for large particle datasets that groups MPI | Bool | false | | | calls needed during the IO together. Try it seeing poor IO speeds | | | | | on large problems. | | | +-------------------+-----------------------------------------------------------------------+-------------+-------------+ From b95482d3b2ad6cab8318c867eb3b71b4f9ee11ac Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 29 Sep 2023 17:23:48 -0700 Subject: [PATCH 002/186] Free Disk Space in Github CIs (#3574) Add a new script that removes some packages we do not need. It can free up about 7 GB of space on Ubuntu. Use it in nvhpc and sycl/amd CIs. These CIs often run out of space. We don't want to use it in every CI, because it takes a few minutes to run the script. --- .github/workflows/cuda.yml | 1 + .../dependencies/ubuntu_free_disk_space.sh | 40 +++++++++++++++++++ .github/workflows/intel.yml | 1 + 3 files changed, 42 insertions(+) create mode 100755 .github/workflows/dependencies/ubuntu_free_disk_space.sh diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index b4abc1c0a05..3fe50a5a02d 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -117,6 +117,7 @@ jobs: - uses: actions/checkout@v3 - name: Dependencies run: | + .github/workflows/dependencies/ubuntu_free_disk_space.sh .github/workflows/dependencies/dependencies_nvhpc.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache diff --git a/.github/workflows/dependencies/ubuntu_free_disk_space.sh b/.github/workflows/dependencies/ubuntu_free_disk_space.sh new file mode 100755 index 00000000000..6b3e5b2f55e --- /dev/null +++ b/.github/workflows/dependencies/ubuntu_free_disk_space.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Copyright 2023 The AMReX Community +# +# License: BSD-3-Clause-LBNL + +# Don't want to use the following line because apt-get remove may fail if +# the package specfied does not exist. +# set -eu -o pipefail + +# Large packages +dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100 + +echo 'Removing some packages we do not need' + +df -h + +apt list --installed + +sudo apt-get remove -y '^apache.*' +sudo apt-get remove -y '^aspnetcore.*' +sudo apt-get remove -y '^azure.*' +sudo apt-get remove -y '^dotnet.*' +sudo apt-get remove -y '^firebird.*' +sudo apt-get remove -y '^firefox.*' +sudo apt-get remove -y '^google.*' +sudo apt-get remove -y '^hhvm.*' +sudo apt-get remove -y '^microsoft.*' +sudo apt-get remove -y '^mongodb.*' +sudo apt-get remove -y '^mono-.*' +sudo apt-get remove -y '^monodoc-.*' +sudo apt-get remove -y '^mysql.*' +sudo apt-get remove -y '^php.*' +sudo apt-get remove -y '^powershell.*' +sudo apt-get remove -y '^snapd.*' +sudo apt-get remove -y '^temurin.*' + +sudo apt-get autoremove -y + +df -h diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 031f7d331bf..3d701c9adcd 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -152,6 +152,7 @@ jobs: - name: Dependencies if: ${{ env.CODEPLAYTOKEN != '' }} run: | + .github/workflows/dependencies/ubuntu_free_disk_space.sh .github/workflows/dependencies/dependencies_hip.sh 5.4.6 .github/workflows/dependencies/dependencies_dpcpp.sh .github/workflows/dependencies/dependencies_codeplay.sh ${{ env.CODEPLAYTOKEN }} From 388738dc788cc4a72c33a5d5d9940ef3a37a76b7 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 1 Oct 2023 11:03:12 -0700 Subject: [PATCH 003/186] Update CHANGES for 23.10 (#3576) --- CHANGES | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/CHANGES b/CHANGES index e27552e7cb4..ae56cdd5ffe 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,48 @@ +# 23.10 + + -- Bugfix typo in AMReX_SundialsIntegrator.H Nvar vs NVar, the + declared/used variable is NVar (#3573) + + -- Code Spell (#3563) + + -- Add Fortran interface for average_down_faces (#3553) + + -- PureSoA: Disable AoS Access (#3290) + + -- Another terrain fix for MPI (#3557) + Fix Increment, OK, and EnforcePeriodicWhere for terrain-fitted particles. (#3556) + + -- Added cvode functionality to SUNDIALS integrator (#3436) + + -- ParmParse::addfile needs Init (#3440) + + -- Make the same changes to ApplyInitialRedistribution as to ApplyMLRedistribution (#3554) + + -- Reset EB Fab Type (#3552) + EB Data outside domain (#3549) + + -- We weren't defining cent_hat out far enough (#3548) + + -- Add Fortran inteface for FillCoarsePatch for face variables (#3542) + + -- print_state/printCell: Make it work without managed memory (#3543) + + -- FillPatch Fortran Interface: Fix incorrect size of Vector (#3546) + + -- ReduceOps: reset result readiness flag (#3545) + + -- Fix Fortran interface for FillPatch for face variables (#3541) + + -- Support multiple CUDA architectures at compilation (#3535) + + -- Add Kestrel machine and remove Rhodes machine from Make.nrel (#3533) + + -- Explicitly flush when writing to the terse run log (#3532) + + -- Missing header in AMReX_GpuComplex.H (#3531) + + -- Add global domain id offset to conduit wrapper (#3524) + # 23.09 -- Fix InitRandomPerBox for 1D & 2D (#3527) From 65e7a1ce5e959668542af694b6e4d838ee95d15a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 1 Oct 2023 11:28:49 -0700 Subject: [PATCH 004/186] Fix Boundary Centroid in a Corner Case in 2D (#3568) Due to roundoff errors, It's possible to have a cell with area fraction of 1-epsilon, 1, 1, and 1 at its four faces. In this corner case, the previous way of computing boundary centroid was not correct. In this PR, it's fixed by using the level set. --- Src/EB/AMReX_EB2_2D_C.cpp | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/Src/EB/AMReX_EB2_2D_C.cpp b/Src/EB/AMReX_EB2_2D_C.cpp index b77b2ebd00e..b2bbde200c5 100644 --- a/Src/EB/AMReX_EB2_2D_C.cpp +++ b/Src/EB/AMReX_EB2_2D_C.cpp @@ -9,7 +9,7 @@ void set_eb_data (const int i, const int j, GpuArray const& dx, Array4 const& vfrac, Array4 const& vcent, Array4 const& barea, Array4 const& bcent, - Array4 const& bnorm) noexcept + Array4 const& bnorm, Array4 const& levset) noexcept { #ifdef AMREX_USE_FLOAT constexpr Real almostone = 1.0_rt-1.e-6_rt; @@ -37,7 +37,20 @@ void set_eb_data (const int i, const int j, const Real nyabs = std::abs(ny); Real x_ym, x_yp, y_xm, y_xp; - if (nx > 0.0_rt) { + if (nx == 0.0_rt) { + if (apx(i,j,0) == 1.0_rt && apx(i+1,j,0) == 1.0_rt) { + if (levset(i,j,0) > 0.0_rt || levset(i,j+1,0) > 0.0_rt) { + x_ym = 0.5_rt*dx[0] - aym; + x_yp = 0.5_rt*dx[0] - ayp; + } else { + x_ym = -0.5_rt*dx[0] + aym; + x_yp = -0.5_rt*dx[0] + ayp; + } + } else { + x_ym = 0.0_rt; + x_yp = 0.0_rt; + } + } else if (nx > 0.0_rt) { x_ym = -0.5_rt*dx[0] + aym; x_yp = -0.5_rt*dx[0] + ayp; } else { @@ -45,7 +58,20 @@ void set_eb_data (const int i, const int j, x_yp = 0.5_rt*dx[0] - ayp; } - if (ny > 0.0_rt) { + if (ny == 0.0_rt) { + if (apy(i,j,0) == 1.0_rt && apy(i,j+1,0) == 1.0_rt) { + if (levset(i,j,0) > 0.0_rt || levset(i+1,j,0) > 0.0_rt) { + y_xm = 0.5_rt*dx[1] - axm; + y_xp = 0.5_rt*dx[1] - axp; + } else { + y_xm = -0.5_rt*dx[1] + axm; + y_xp = -0.5_rt*dx[1] + axp; + } + } else { + y_xm = 0.0_rt; + y_xp = 0.0_rt; + } + } else if (ny > 0.0_rt) { y_xm = -0.5_rt*dx[1] + axm; y_xp = -0.5_rt*dx[1] + axp; } else { @@ -135,7 +161,8 @@ bool set_eb_cell (int i, int j, Array4 const& cell, GpuArray const& dx, Array4 const& vfrac, Array4 const& vcent, Array4 const& barea, Array4 const& bcent, - Array4 const& bnorm, Real small_volfrac) noexcept + Array4 const& bnorm, Array4 const& levset, + Real small_volfrac) noexcept { bool is_small_cell = false; if (cell(i,j,0).isRegular()) { @@ -157,7 +184,7 @@ bool set_eb_cell (int i, int j, Array4 const& cell, bnorm(i,j,0,0) = 0.0_rt; bnorm(i,j,0,1) = 0.0_rt; } else { - set_eb_data(i,j,apx,apy,dx,vfrac,vcent,barea,bcent,bnorm); + set_eb_data(i,j,apx,apy,dx,vfrac,vcent,barea,bcent,bnorm,levset); // remove small cells if (vfrac(i,j,0) < small_volfrac) { set_covered(i,j,cell,vfrac,vcent,barea,bcent,bnorm); @@ -341,7 +368,7 @@ void build_cells (Box const& bx, Array4 const& cell, { amrex::ignore_unused(k); bool is_small = set_eb_cell(i, j, cell, apx, apy, dx, vfrac, vcent, barea, bcent, - bnorm, small_volfrac); + bnorm, levset, small_volfrac); if (is_small) { Gpu::Atomic::Add(dp, 1); } From 703a2041f758978bcb4894932265ab2f22812247 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 1 Oct 2023 11:30:06 -0700 Subject: [PATCH 005/186] MLNodeABecLaplacian (#3559) Add a new Linear Opearator class MLNodeABecLaplacian. It solves `(alpha * a - beta * (del dot b grad)) phi = rhs`, where a, phi and rhs are nodal MultiFabs and b is cell-centered. It works in 2D and 3D, CPU and GPU. So far it works only for single level. There is an example of using it at Tests/LinearSolvers/ABecLaplacian_C as prob_type=4. The test problem converges at the second order. --------- Co-authored-by: Andy Nonaka --- Src/LinearSolvers/CMakeLists.txt | 4 + .../MLMG/AMReX_MLNodeABecLap_1D_K.H | 30 ++ .../MLMG/AMReX_MLNodeABecLap_2D_K.H | 67 ++++ .../MLMG/AMReX_MLNodeABecLap_3D_K.H | 93 +++++ .../MLMG/AMReX_MLNodeABecLap_K.H | 13 + .../MLMG/AMReX_MLNodeABecLaplacian.H | 82 +++++ .../MLMG/AMReX_MLNodeABecLaplacian.cpp | 334 ++++++++++++++++++ .../MLMG/AMReX_MLNodeLaplacian.H | 4 - Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H | 4 + Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp | 2 +- Src/LinearSolvers/MLMG/Make.package | 3 + Tests/LinearSolvers/ABecLaplacian_C/MyTest.H | 6 +- .../LinearSolvers/ABecLaplacian_C/MyTest.cpp | 70 +++- .../ABecLaplacian_C/MyTestPlotfile.cpp | 13 + .../ABecLaplacian_C/initProb.cpp | 37 ++ .../ABecLaplacian_C/initProb_K.H | 70 ++++ .../LinearSolvers/ABecLaplacian_C/inputs-node | 16 + 17 files changed, 836 insertions(+), 12 deletions(-) create mode 100644 Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_1D_K.H create mode 100644 Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_2D_K.H create mode 100644 Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_3D_K.H create mode 100644 Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_K.H create mode 100644 Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H create mode 100644 Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp create mode 100644 Tests/LinearSolvers/ABecLaplacian_C/inputs-node diff --git a/Src/LinearSolvers/CMakeLists.txt b/Src/LinearSolvers/CMakeLists.txt index 500716be3a0..c2851d49959 100644 --- a/Src/LinearSolvers/CMakeLists.txt +++ b/Src/LinearSolvers/CMakeLists.txt @@ -49,6 +49,10 @@ foreach(D IN LISTS AMReX_SPACEDIM) MLMG/AMReX_MLEBNodeFDLaplacian.cpp MLMG/AMReX_MLEBNodeFDLap_K.H MLMG/AMReX_MLEBNodeFDLap_${D}D_K.H + MLMG/AMReX_MLNodeABecLaplacian.H + MLMG/AMReX_MLNodeABecLaplacian.cpp + MLMG/AMReX_MLNodeABecLap_K.H + MLMG/AMReX_MLNodeABecLap_${D}D_K.H ) if (D EQUAL 3) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_1D_K.H new file mode 100644 index 00000000000..34a2ddda6f7 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_1D_K.H @@ -0,0 +1,30 @@ +#ifndef AMREX_MLNODEABECLAP_1D_K_H_ +#define AMREX_MLNODEABECLAP_1D_K_H_ + +namespace amrex { + +inline void +mlndabeclap_gauss_seidel_aa (Box const& /*bx*/, Array4 const& /*sol*/, + Array4 const& /*rhs*/, + Real /*alpha*/, Real /*beta*/, + Array4 const& /*acf*/, + Array4 const& /*bcf*/, + Array4 const& /*msk*/, + GpuArray const& /*dxinv*/) noexcept +{} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +mlndabeclap_jacobi_aa (int /*i*/, int /*j*/, int /*k*/, + Array4 const& /*sol*/, + Real /*lap*/, + Array4 const& /*rhs*/, + Real /*alpha*/, Real /*beta*/, + Array4 const& /*acf*/, + Array4 const& /*bcf*/, + Array4 const& /*msk*/, + GpuArray const& /*dxinv*/) noexcept +{} + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_2D_K.H new file mode 100644 index 00000000000..3418b19d279 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_2D_K.H @@ -0,0 +1,67 @@ +#ifndef AMREX_MLNODEABECLAP_2D_K_H_ +#define AMREX_MLNODEABECLAP_2D_K_H_ + +namespace amrex { + +inline void +mlndabeclap_gauss_seidel_aa (Box const& bx, Array4 const& sol, + Array4 const& rhs, + Real alpha, Real beta, + Array4 const& acf, + Array4 const& bcf, + Array4 const& msk, + GpuArray const& dxinv) noexcept +{ + Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1]; + Real fxy = facx + facy; + Real f2xmy = Real(2.0)*facx - facy; + Real fmx2y = Real(2.0)*facy - facx; + + amrex::Loop(bx, [=] (int i, int j, int k) noexcept + { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real s0 = (-Real(2.0))*fxy*(bcf(i-1,j-1,k)+bcf(i,j-1,k)+bcf(i-1,j,k)+bcf(i,j,k)); + Real lap = sol(i-1,j-1,k)*fxy*bcf(i-1,j-1,k) + + sol(i+1,j-1,k)*fxy*bcf(i ,j-1,k) + + sol(i-1,j+1,k)*fxy*bcf(i-1,j ,k) + + sol(i+1,j+1,k)*fxy*bcf(i ,j ,k) + + sol(i-1,j,k)*f2xmy*(bcf(i-1,j-1,k)+bcf(i-1,j,k)) + + sol(i+1,j,k)*f2xmy*(bcf(i ,j-1,k)+bcf(i ,j,k)) + + sol(i,j-1,k)*fmx2y*(bcf(i-1,j-1,k)+bcf(i,j-1,k)) + + sol(i,j+1,k)*fmx2y*(bcf(i-1,j ,k)+bcf(i,j ,k)) + + sol(i,j,k)*s0; + Real Ax = alpha*acf(i,j,k)*sol(i,j,k) - beta*lap; + + sol(i,j,k) += (rhs(i,j,k) - Ax) / (alpha*acf(i,j,k)-beta*s0); + } + }); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +mlndabeclap_jacobi_aa (int i, int j, int k, Array4 const& sol, + Real lap, Array4 const& rhs, + Real alpha, Real beta, + Array4 const& acf, + Array4 const& bcf, + Array4 const& msk, + GpuArray const& dxinv) noexcept +{ + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real fac = -Real(2.0/6.0)*(dxinv[0]*dxinv[0] + dxinv[1]*dxinv[1]); + Real s0 = fac*(bcf(i-1,j-1,k)+bcf(i,j-1,k)+bcf(i-1,j,k)+bcf(i,j,k)); + Real Ax = alpha*acf(i,j,k)*sol(i,j,k) - beta*lap; + + sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax) + / (alpha*acf(i,j,k)-beta*s0); + } + +} + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_3D_K.H new file mode 100644 index 00000000000..5ddb93a958c --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_3D_K.H @@ -0,0 +1,93 @@ +#ifndef AMREX_MLNODEABECLAP_3D_K_H_ +#define AMREX_MLNODEABECLAP_3D_K_H_ + +namespace amrex { + +inline void +mlndabeclap_gauss_seidel_aa (Box const& bx, Array4 const& sol, + Array4 const& rhs, + Real alpha, Real beta, + Array4 const& acf, + Array4 const& bcf, + Array4 const& msk, + GpuArray const& dxinv) noexcept +{ + Real facx = Real(1.0/36.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/36.0)*dxinv[1]*dxinv[1]; + Real facz = Real(1.0/36.0)*dxinv[2]*dxinv[2]; + Real fxyz = facx + facy + facz; + Real fmx2y2z = -facx + Real(2.0)*facy + Real(2.0)*facz; + Real f2xmy2z = Real(2.0)*facx - facy + Real(2.0)*facz; + Real f2x2ymz = Real(2.0)*facx + Real(2.0)*facy - facz; + Real f4xm2ym2z = Real(4.0)*facx - Real(2.0)*facy - Real(2.0)*facz; + Real fm2x4ym2z = -Real(2.0)*facx + Real(4.0)*facy - Real(2.0)*facz; + Real fm2xm2y4z = -Real(2.0)*facx - Real(2.0)*facy + Real(4.0)*facz; + + amrex::LoopOnCpu(bx, [=] (int i, int j, int k) noexcept + { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real s0 = Real(-4.0)*fxyz*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)+bcf(i-1,j,k-1)+bcf(i,j,k-1) + +bcf(i-1,j-1,k )+bcf(i,j-1,k )+bcf(i-1,j,k )+bcf(i,j,k )); + Real lap = sol(i,j,k)*s0 + + fxyz*(sol(i-1,j-1,k-1)*bcf(i-1,j-1,k-1) + + sol(i+1,j-1,k-1)*bcf(i ,j-1,k-1) + + sol(i-1,j+1,k-1)*bcf(i-1,j ,k-1) + + sol(i+1,j+1,k-1)*bcf(i ,j ,k-1) + + sol(i-1,j-1,k+1)*bcf(i-1,j-1,k ) + + sol(i+1,j-1,k+1)*bcf(i ,j-1,k ) + + sol(i-1,j+1,k+1)*bcf(i-1,j ,k ) + + sol(i+1,j+1,k+1)*bcf(i ,j ,k )) + + fmx2y2z*(sol(i ,j-1,k-1)*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)) + + sol(i ,j+1,k-1)*(bcf(i-1,j ,k-1)+bcf(i,j ,k-1)) + + sol(i ,j-1,k+1)*(bcf(i-1,j-1,k )+bcf(i,j-1,k )) + + sol(i ,j+1,k+1)*(bcf(i-1,j ,k )+bcf(i,j ,k ))) + + f2xmy2z*(sol(i-1,j ,k-1)*(bcf(i-1,j-1,k-1)+bcf(i-1,j,k-1)) + + sol(i+1,j ,k-1)*(bcf(i ,j-1,k-1)+bcf(i ,j,k-1)) + + sol(i-1,j ,k+1)*(bcf(i-1,j-1,k )+bcf(i-1,j,k )) + + sol(i+1,j ,k+1)*(bcf(i ,j-1,k )+bcf(i ,j,k ))) + + f2x2ymz*(sol(i-1,j-1,k )*(bcf(i-1,j-1,k-1)+bcf(i-1,j-1,k)) + + sol(i+1,j-1,k )*(bcf(i ,j-1,k-1)+bcf(i ,j-1,k)) + + sol(i-1,j+1,k )*(bcf(i-1,j ,k-1)+bcf(i-1,j ,k)) + + sol(i+1,j+1,k )*(bcf(i ,j ,k-1)+bcf(i ,j ,k))) + + f4xm2ym2z*(sol(i-1,j,k)*(bcf(i-1,j-1,k-1)+bcf(i-1,j,k-1)+bcf(i-1,j-1,k)+bcf(i-1,j,k)) + + sol(i+1,j,k)*(bcf(i ,j-1,k-1)+bcf(i ,j,k-1)+bcf(i ,j-1,k)+bcf(i ,j,k))) + + fm2x4ym2z*(sol(i,j-1,k)*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)+bcf(i-1,j-1,k)+bcf(i,j-1,k)) + + sol(i,j+1,k)*(bcf(i-1,j ,k-1)+bcf(i,j ,k-1)+bcf(i-1,j ,k)+bcf(i,j ,k))) + + fm2xm2y4z*(sol(i,j,k-1)*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)+bcf(i-1,j,k-1)+bcf(i,j,k-1)) + + sol(i,j,k+1)*(bcf(i-1,j-1,k )+bcf(i,j-1,k )+bcf(i-1,j,k )+bcf(i,j,k ))); + Real Ax = alpha*acf(i,j,k)*sol(i,j,k) - beta*lap; + + sol(i,j,k) += (rhs(i,j,k) - Ax) / (alpha*acf(i,j,k)-beta*s0); + } + }); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +mlndabeclap_jacobi_aa (int i, int j, int k, Array4 const& sol, + Real lap, Array4 const& rhs, + Real alpha, Real beta, + Array4 const& acf, + Array4 const& bcf, + Array4 const& msk, + GpuArray const& dxinv) noexcept +{ + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real fxyz = Real(-4.0 / 36.0)*(dxinv[0]*dxinv[0] + + dxinv[1]*dxinv[1] + + dxinv[2]*dxinv[2]); + Real s0 = fxyz*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)+bcf(i-1,j,k-1)+bcf(i,j,k-1) + +bcf(i-1,j-1,k )+bcf(i,j-1,k )+bcf(i-1,j,k )+bcf(i,j,k)); + Real Ax = alpha*acf(i,j,k)*sol(i,j,k) - beta*lap; + + sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax) + / (alpha*acf(i,j,k)-beta*s0); + } +} + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_K.H new file mode 100644 index 00000000000..fd744bacd1e --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_K.H @@ -0,0 +1,13 @@ +#ifndef AMREX_MLNODEABECLAP_K_H_ +#define AMREX_MLNODEABECLAP_K_H_ +#include + +#if (AMREX_SPACEDIM == 1) +#include +#elif (AMREX_SPACEDIM == 2) +#include +#else +#include +#endif + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H new file mode 100644 index 00000000000..b261c5bb3f2 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H @@ -0,0 +1,82 @@ +#ifndef AMREX_MLNODEABECLAPLACIAN_H_ +#define AMREX_MLNODEABECLAPLACIAN_H_ +#include + +#include + +namespace amrex { + +// (alpha * a - beta * (del dot b grad)) phi = rhs +// a, phi and rhs are nodal. b is cell-centered. + +class MLNodeABecLaplacian + : public MLNodeLinOp +{ +public: + + MLNodeABecLaplacian () = default; + MLNodeABecLaplacian (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo(), + const Vector const*>& a_factory = {}); + ~MLNodeABecLaplacian () override = default; + + MLNodeABecLaplacian (const MLNodeABecLaplacian&) = delete; + MLNodeABecLaplacian (MLNodeABecLaplacian&&) = delete; + MLNodeABecLaplacian& operator= (const MLNodeABecLaplacian&) = delete; + MLNodeABecLaplacian& operator= (MLNodeABecLaplacian&&) = delete; + + void define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo(), + const Vector const*>& a_factory = {}); + + std::string name () const override { return std::string("MLNodeABecLaplacian"); } + + void setScalars (Real a, Real b) { + m_a_scalar = a; + m_b_scalar = b; + } + + void setACoeffs (int amrlev, Real a_acoef); + void setACoeffs (int amrlev, const MultiFab& a_acoef); + + void setBCoeffs (int amrlev, Real a_bcoef); + void setBCoeffs (int amrlev, const MultiFab& a_bcoef); + + void Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) const final; + void Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs) const final; + + void fixUpResidualMask (int amrlev, iMultiFab& resmsk) final; + + bool isSingular (int /*amrlev*/) const final { return false; } + bool isBottomSingular () const final { return false; } + + void restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab& fine) const final; + void interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const final; + void averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, + const MultiFab& fine_sol, const MultiFab& fine_rhs) final; + + void reflux (int crse_amrlev, + MultiFab& res, const MultiFab& crse_sol, const MultiFab& crse_rhs, + MultiFab& fine_res, MultiFab& fine_sol, const MultiFab& fine_rhs) const final; + + void prepareForSolve () final; + + void averageDownCoeffs (); + void averageDownCoeffsToCoarseAmrLevel (int flev); + void averageDownCoeffsSameAmrLevel (int amrlev); + +private: + + Real m_a_scalar = std::numeric_limits::quiet_NaN(); + Real m_b_scalar = std::numeric_limits::quiet_NaN(); + Vector > m_a_coeffs; + Vector > m_b_coeffs; +}; + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp new file mode 100644 index 00000000000..c58fb0e6da9 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp @@ -0,0 +1,334 @@ +#include +#include +#include + +namespace amrex { + +MLNodeABecLaplacian::MLNodeABecLaplacian (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info, + const Vector const*>& a_factory) +{ + define(a_geom, a_grids, a_dmap, a_info, a_factory); +} + +void +MLNodeABecLaplacian::define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info, + const Vector const*>& a_factory) +{ +#ifdef AMREX_USE_EB + amrex::Abort("MLNodeABecLaplacian does not support EB"); +#endif + + BL_PROFILE("MLNodeABecLaplacian::define()"); + + // This makes sure grids are cell-centered; + Vector cc_grids = a_grids; + for (auto& ba : cc_grids) { + ba.enclosedCells(); + } + + MLNodeLinOp::define(a_geom, cc_grids, a_dmap, a_info, a_factory); + + const int ncomp = getNComp(); + + m_a_coeffs.resize(m_num_amr_levels); + m_b_coeffs.resize(m_num_amr_levels); + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + m_a_coeffs[amrlev].resize(m_num_mg_levels[amrlev]); + m_b_coeffs[amrlev].resize(m_num_mg_levels[amrlev]); + for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { + m_a_coeffs[amrlev][mglev].define + (amrex::convert(m_grids[amrlev][mglev], IntVect::TheNodeVector()), + m_dmap[amrlev][mglev], ncomp, 0); + m_b_coeffs[amrlev][mglev].define + (m_grids[amrlev][mglev], m_dmap[amrlev][mglev], ncomp, 1); + } + } +} + +void +MLNodeABecLaplacian::setACoeffs (int amrlev, Real a_acoef) +{ + m_a_coeffs[amrlev][0].setVal(a_acoef); +} + +void +MLNodeABecLaplacian::setACoeffs (int amrlev, const MultiFab& a_acoef) +{ + const int ncomp = getNComp(); + m_a_coeffs[amrlev][0].LocalCopy(a_acoef, 0, 0, ncomp, IntVect(0)); +} + +void +MLNodeABecLaplacian::setBCoeffs (int amrlev, Real a_bcoef) +{ + m_b_coeffs[amrlev][0].setVal(a_bcoef); +} + +void +MLNodeABecLaplacian::setBCoeffs (int amrlev, const MultiFab& a_bcoef) +{ + const int ncomp = getNComp(); + m_b_coeffs[amrlev][0].LocalCopy(a_bcoef, 0, 0, ncomp, IntVect(0)); +} + +void +MLNodeABecLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) const +{ + BL_PROFILE("MLNodeLaplacian::Fapply()"); + + AMREX_ALWAYS_ASSERT(getNComp() == 1); + + auto const alpha = m_a_scalar; + auto const beta = m_b_scalar; + const auto dxinvarr = m_geom[amrlev][mglev].InvCellSizeArray(); + + auto const& acoef_ma = m_a_coeffs[amrlev][mglev].const_arrays(); + auto const& bcoef_ma = m_b_coeffs[amrlev][mglev].const_arrays(); + auto const& dmskarr_ma = m_dirichlet_mask[amrlev][mglev]->const_arrays(); + + auto const& xarr_ma = in.const_arrays(); + auto const& yarr_ma = out.arrays(); + + ParallelFor(out, [=] AMREX_GPU_DEVICE(int box_no, int i, int j, int k) noexcept + { + auto lap = mlndlap_adotx_aa(i,j,k,xarr_ma[box_no],bcoef_ma[box_no],dmskarr_ma[box_no], +#if (AMREX_SPACEDIM == 2) + false, +#endif + dxinvarr); + yarr_ma[box_no](i,j,k) = (dmskarr_ma[box_no](i,j,k)) ? Real(0.0) + : alpha*acoef_ma[box_no](i,j,k)*xarr_ma[box_no](i,j,k) - beta*lap; + }); + Gpu::streamSynchronize(); +} + +void +MLNodeABecLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs) const +{ + BL_PROFILE("MLNodeABecLaplacian::Fsmooth()"); + + auto const alpha = m_a_scalar; + auto const beta = m_b_scalar; + const auto dxinvarr = m_geom[amrlev][mglev].InvCellSizeArray(); + + auto const& acoef = m_a_coeffs[amrlev][mglev]; + auto const& bcoef = m_b_coeffs[amrlev][mglev]; + auto const& dmsk = *(m_dirichlet_mask[amrlev][mglev]); + +#ifdef AMREX_USE_GPU + + auto const& acoef_ma = acoef.const_arrays(); + auto const& bcoef_ma = bcoef.const_arrays(); + auto const& dmskarr_ma = dmsk.const_arrays(); + auto const& solarr_ma = sol.arrays(); + auto const& rhsarr_ma = rhs.const_arrays(); + + for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) { + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + auto lap = mlndlap_adotx_aa(i,j,k,solarr_ma[box_no],bcoef_ma[box_no],dmskarr_ma[box_no], +#if (AMREX_SPACEDIM == 2) + false, +#endif + dxinvarr); + mlndabeclap_jacobi_aa(i,j,k, solarr_ma[box_no], lap, rhsarr_ma[box_no], alpha, beta, + acoef_ma[box_no], bcoef_ma[box_no], + dmskarr_ma[box_no], dxinvarr); + }); + Gpu::streamSynchronize(); + if (m_smooth_num_sweeps > 1) { nodalSync(amrlev, mglev, sol); } + } +#else + +#ifdef AMREX_USE_OMP +#pragma omp parallel +#endif + for (MFIter mfi(sol); mfi.isValid(); ++mfi) { + const Box& bx = mfi.validbox(); + Array4 const& aarr = acoef.array(mfi); + Array4 const& barr = bcoef.array(mfi); + Array4 const& solarr = sol.array(mfi); + Array4 const& rhsarr = rhs.const_array(mfi); + Array4 const& dmskarr = dmsk.const_array(mfi); + for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) { + mlndabeclap_gauss_seidel_aa(bx, solarr, rhsarr, alpha, beta, + aarr, barr, dmskarr, dxinvarr); + } + } + nodalSync(amrlev, mglev, sol); +#endif +} + +void +MLNodeABecLaplacian::restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab& fine) const +{ + BL_PROFILE("MLNodeABecLaplacian::restriction()"); + + applyBC(amrlev, cmglev-1, fine, BCMode::Homogeneous, StateMode::Solution); + + bool need_parallel_copy = !amrex::isMFIterSafe(crse, fine); + MultiFab cfine; + if (need_parallel_copy) { + const BoxArray& ba = amrex::coarsen(fine.boxArray(), 2); + cfine.define(ba, fine.DistributionMap(), 1, 0); + } + + MultiFab* pcrse = (need_parallel_copy) ? &cfine : &crse; + + auto pcrse_ma = pcrse->arrays(); + auto fine_ma = fine.const_arrays(); + auto msk_ma = m_dirichlet_mask[amrlev][cmglev-1]->const_arrays(); + + ParallelFor(*pcrse, [=] AMREX_GPU_DEVICE(int box_no, int i, int j, int k) noexcept + { + mlndlap_restriction(i,j,k,pcrse_ma[box_no],fine_ma[box_no],msk_ma[box_no]); + }); + Gpu::streamSynchronize(); + + if (need_parallel_copy) { + crse.ParallelCopy(cfine); + } +} + +void +MLNodeABecLaplacian::interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const +{ + BL_PROFILE("MLNodeABecLaplacian::interpolation()"); + + bool need_parallel_copy = !amrex::isMFIterSafe(crse, fine); + MultiFab cfine; + const MultiFab* cmf = &crse; + if (need_parallel_copy) { + const BoxArray& ba = amrex::coarsen(fine.boxArray(), 2); + cfine.define(ba, fine.DistributionMap(), 1, 0); + cfine.ParallelCopy(crse); + cmf = &cfine; + } + + auto const& fine_ma = fine.arrays(); + auto const& crse_ma = cmf->const_arrays(); + auto const& msk_ma = m_dirichlet_mask[amrlev][fmglev]->const_arrays(); + auto const& sig_ma = m_b_coeffs[amrlev][fmglev].const_arrays(); + + ParallelFor(fine, [=] AMREX_GPU_DEVICE(int box_no, int i, int j, int k) noexcept + { + mlndlap_interpadd_aa(i, j, k, fine_ma[box_no], crse_ma[box_no], + sig_ma[box_no], msk_ma[box_no]); + }); + Gpu::streamSynchronize(); +} + +void +MLNodeABecLaplacian::averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, + const MultiFab& fine_sol, const MultiFab& fine_rhs) +{ + amrex::ignore_unused(camrlev,crse_sol,crse_rhs,fine_sol,fine_rhs); + amrex::Abort("MLNodeABecLaplacian::averageDownSolutionRHS TODO"); +} + +void +MLNodeABecLaplacian::reflux (int crse_amrlev, + MultiFab& res, const MultiFab& crse_sol, const MultiFab& crse_rhs, + MultiFab& fine_res, MultiFab& fine_sol, const MultiFab& fine_rhs) const +{ + amrex::ignore_unused(crse_amrlev,res,crse_sol,crse_rhs,fine_res,fine_sol,fine_rhs); + amrex::Abort("MLNodeABecLaplacian::reflux TODO"); +} + +void +MLNodeABecLaplacian::prepareForSolve () +{ + BL_PROFILE("MLNodeABecLaplacian::prepareForSolve()"); + + MLNodeLinOp::prepareForSolve(); + + buildMasks(); + + averageDownCoeffs(); +} + +void +MLNodeABecLaplacian::fixUpResidualMask (int amrlev, iMultiFab& resmsk) +{ + if (!m_masks_built) { buildMasks(); } + + auto const& fmsk = m_nd_fine_mask[amrlev]->const_arrays(); + auto const& rmsk = resmsk.arrays(); + + amrex::ParallelFor(resmsk, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + { + if (fmsk[bno](i,j,k) == crse_fine_node) { rmsk[bno](i,j,k) = 1; } + }); + Gpu::streamSynchronize(); +} + +void +MLNodeABecLaplacian::averageDownCoeffs () +{ + BL_PROFILE("MLNodeABecLaplacian::averageDownCoeffs()"); + + for (int amrlev = m_num_amr_levels-1; amrlev > 0; --amrlev) { + averageDownCoeffsSameAmrLevel(amrlev); + averageDownCoeffsToCoarseAmrLevel(amrlev); + } + + averageDownCoeffsSameAmrLevel(0); + + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { + m_b_coeffs[amrlev][mglev].FillBoundary(m_geom[amrlev][mglev].periodicity()); + + const Box& domain = m_geom[amrlev][mglev].Domain(); + const auto lobc = LoBC(); + const auto hibc = HiBC(); + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) { mfi_info.SetDynamic(true); } +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(m_b_coeffs[amrlev][mglev], mfi_info); mfi.isValid(); ++mfi) + { + Array4 const& sfab = m_b_coeffs[amrlev][mglev].array(mfi); + mlndlap_fillbc_cc(mfi.validbox(),sfab,domain,lobc,hibc); + } + } + } +} + +void +MLNodeABecLaplacian::averageDownCoeffsToCoarseAmrLevel (int flev) +{ + const int mglev = 0; + const int ncomp = getNComp(); + // xxxxx TODO: There is a potential issue of the coarse data not consistent + // across periodic boundaries. + amrex::average_down_nodal(m_a_coeffs[flev ][mglev], + m_a_coeffs[flev-1][mglev], + IntVect(m_amr_ref_ratio[flev-1])); + amrex::average_down(m_b_coeffs[flev ][mglev], + m_b_coeffs[flev-1][mglev], 0, ncomp, + m_amr_ref_ratio[flev-1]); +} + +void +MLNodeABecLaplacian::averageDownCoeffsSameAmrLevel (int amrlev) +{ + const int ncomp = getNComp(); + for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { + IntVect ratio(mg_coarsen_ratio); + amrex::average_down_nodal(m_a_coeffs[amrlev][mglev-1], + m_a_coeffs[amrlev][mglev ], ratio); + amrex::average_down(m_b_coeffs[amrlev][mglev-1], + m_b_coeffs[amrlev][mglev ], 0, ncomp, ratio); + } +} + +} diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H index 7ec9f13ce73..adbf00da231 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H @@ -84,10 +84,6 @@ public : if (m_const_sigma == Real(0.0)) { m_coarsening_strategy = cs; } } - void setSmoothNumSweeps (int nsweeps) noexcept { - m_smooth_num_sweeps = nsweeps; - } - BottomSolver getDefaultBottomSolver () const final { return (m_coarsening_strategy == CoarseningStrategy::RAP) ? BottomSolver::bicgcg : BottomSolver::bicgstab; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H index 3c36989f79a..424d22f60c3 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H @@ -33,6 +33,10 @@ public: const Vector const*>& a_factory = {}, int a_eb_limit_coarsening = -1); + void setSmoothNumSweeps (int nsweeps) noexcept { + m_smooth_num_sweeps = nsweeps; + } + void setLevelBC (int /*amrlev*/, const MultiFab* /*levelbcdata*/, const MultiFab* = nullptr, const MultiFab* = nullptr, const MultiFab* = nullptr) final {} diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp index 5e2389c1f93..0fb9e2ba33b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp @@ -368,7 +368,7 @@ MLNodeLinOp::buildMasks () MLNodeLinOp_set_dot_mask(m_bottom_dot_mask, omask, geom, lobc, hibc, m_coarsening_strategy); } - if (m_is_bottom_singular) + if (isBottomSingular()) { int amrlev = 0; int mglev = 0; diff --git a/Src/LinearSolvers/MLMG/Make.package b/Src/LinearSolvers/MLMG/Make.package index 22934a0d045..d66d64ec0eb 100644 --- a/Src/LinearSolvers/MLMG/Make.package +++ b/Src/LinearSolvers/MLMG/Make.package @@ -50,6 +50,9 @@ ifeq ($(USE_HYPRE),TRUE) CEXE_sources += AMReX_MLNodeLaplacian_hypre.cpp endif +CEXE_headers += AMReX_MLNodeABecLaplacian.H +CEXE_sources += AMReX_MLNodeABecLaplacian.cpp +CEXE_headers += AMReX_MLNodeABecLap_K.H AMReX_MLNodeABecLap_$(DIM)D_K.H CEXE_headers += AMReX_MLNodeTensorLaplacian.H CEXE_sources += AMReX_MLNodeTensorLaplacian.cpp diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H index afe97c9bea6..c1ed7ba4c3d 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H @@ -20,6 +20,7 @@ public: void initProbPoisson (); void initProbABecLaplacian (); void initProbABecLaplacianInhomNeumann (); + void initProbNodeABecLaplacian (); private: @@ -28,6 +29,7 @@ private: void solvePoisson (); void solveABecLaplacian (); void solveABecLaplacianInhomNeumann (); + void solveNodeABecLaplacian (); int max_level = 1; int ref_ratio = 2; @@ -36,7 +38,9 @@ private: bool composite_solve = true; - int prob_type = 1; // 1. Poisson, 2. ABecLaplacian + // 1. Poisson, 2. ABecLaplacian, 3. ABecLaplacian w/ inhomNeumann + // 4. NodeABecLaplacian + int prob_type = 1; // For MLMG solver int verbose = 2; diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp index 0db9f5959d8..9900ce43d7f 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp @@ -1,5 +1,6 @@ #include "MyTest.H" +#include #include #include #include @@ -22,6 +23,8 @@ MyTest::solve () solveABecLaplacian(); } else if (prob_type == 3) { solveABecLaplacianInhomNeumann(); + } else if (prob_type == 4) { + solveNodeABecLaplacian(); } else { amrex::Abort("Unknown prob_type"); } @@ -409,6 +412,54 @@ MyTest::solveABecLaplacianInhomNeumann () } } +void +MyTest::solveNodeABecLaplacian () +{ + LPInfo info; + info.setAgglomeration(agglomeration); + info.setConsolidation(consolidation); + info.setMaxCoarseningLevel(max_coarsening_level); + + const auto tol_rel = Real(1.e-10); + const auto tol_abs = Real(0.0); + + const auto nlevels = static_cast(geom.size()); + + if (composite_solve && nlevels > 1) + { + amrex::Abort("solveNodeABecLaplacian: TODO composite_solve"); + } + else + { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nlevels == 1, "solveNodeABecLaplacian: nlevels > 1 TODO"); + for (int ilev = 0; ilev < nlevels; ++ilev) + { + MLNodeABecLaplacian mlndabec({geom[ilev]}, {grids[ilev]}, {dmap[ilev]}, + info); + + mlndabec.setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Neumann, + LinOpBCType::Dirichlet)}, + {AMREX_D_DECL(LinOpBCType::Neumann, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}); + + mlndabec.setScalars(ascalar, bscalar); + + mlndabec.setACoeffs(0, acoef[ilev]); + mlndabec.setBCoeffs(0, bcoef[ilev]); + + MLMG mlmg(mlndabec); + mlmg.setMaxIter(max_iter); + mlmg.setMaxFmgIter(max_fmg_iter); + mlmg.setVerbose(verbose); + mlmg.setBottomVerbose(bottom_verbose); + + mlmg.solve({&solution[ilev]}, {&rhs[ilev]}, tol_rel, tol_abs); + } + } +} + void MyTest::readParameters () { @@ -463,7 +514,7 @@ MyTest::initData () rhs.resize(nlevels); exact_solution.resize(nlevels); - if (prob_type == 2 || prob_type == 3) { + if (prob_type == 2 || prob_type == 3 || prob_type == 4) { acoef.resize(nlevels); bcoef.resize(nlevels); } @@ -491,12 +542,17 @@ MyTest::initData () for (int ilev = 0; ilev < nlevels; ++ilev) { dmap[ilev].define(grids[ilev]); - solution [ilev].define(grids[ilev], dmap[ilev], 1, 1); - rhs [ilev].define(grids[ilev], dmap[ilev], 1, 0); - exact_solution[ilev].define(grids[ilev], dmap[ilev], 1, 0); + BoxArray ba = grids[ilev]; + if (prob_type == 4) { + ba.surroundingNodes(); + } + solution [ilev].define(ba, dmap[ilev], 1, 1); + rhs [ilev].define(ba, dmap[ilev], 1, 0); + exact_solution[ilev].define(ba, dmap[ilev], 1, 0); if (!acoef.empty()) { - acoef[ilev].define(grids[ilev], dmap[ilev], 1, 0); - bcoef[ilev].define(grids[ilev], dmap[ilev], 1, 1); + acoef[ilev].define(ba , dmap[ilev], 1, 0); + const int ngb = (prob_type == 4) ? 0 : 1; + bcoef[ilev].define(grids[ilev], dmap[ilev], 1, ngb); } } @@ -506,6 +562,8 @@ MyTest::initData () initProbABecLaplacian(); } else if (prob_type == 3) { initProbABecLaplacianInhomNeumann(); + } else if (prob_type == 4) { + initProbNodeABecLaplacian(); } else { amrex::Abort("Unknown prob_type "+std::to_string(prob_type)); } diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp b/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp index 707361a4e34..4473f978a85 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp @@ -8,6 +8,19 @@ using namespace amrex; void MyTest::writePlotfile () const { + if (prob_type == 4) { + for (int ilev = 0; ilev <= max_level; ++ilev) { + VisMF::Write(solution[ilev], "solution-lev"+std::to_string(ilev)); + MultiFab errmf(solution[ilev].boxArray(), + solution[ilev].DistributionMap(), 1, 1); + MultiFab::Copy(errmf, solution[ilev], 0, 0, 1, 0); + MultiFab::Subtract(errmf, exact_solution[ilev], 0, 0, 1, 0); + auto error = errmf.norminf(); + amrex::Print() << "Level " << ilev << " max-norm error: " << error << std::endl; + } + return; + } + ParmParse pp; bool gpu_regtest = false; #ifdef AMREX_USE_GPU diff --git a/Tests/LinearSolvers/ABecLaplacian_C/initProb.cpp b/Tests/LinearSolvers/ABecLaplacian_C/initProb.cpp index a7b197adbed..ec102ae0601 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/initProb.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/initProb.cpp @@ -172,3 +172,40 @@ MyTest::initProbABecLaplacianInhomNeumann () solution[ilev].setVal(0.0,0,1,0); // set interior to 0 } } + +void +MyTest::initProbNodeABecLaplacian () +{ + for (int ilev = 0; ilev <= max_level; ++ilev) + { + solution[ilev].setVal(0.0); + + const auto prob_lo = geom[ilev].ProbLoArray(); + const auto prob_hi = geom[ilev].ProbHiArray(); + const auto dx = geom[ilev].CellSizeArray(); + auto a = ascalar; + auto b = bscalar; + Box const& nddom = amrex::surroundingNodes(geom[ilev].Domain()); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(rhs[ilev]); mfi.isValid(); ++mfi) + { + const Box& ndbx = mfi.validbox(); + + auto rhsfab = rhs[ilev].array(mfi); + auto exactfab = exact_solution[ilev].array(mfi); + auto solfab = solution[ilev].array(mfi); + auto acoeffab = acoef[ilev].array(mfi); + auto bcoeffab = bcoef[ilev].array(mfi); + + amrex::ParallelFor(ndbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + actual_init_nodeabeclap(i,j,k,rhsfab,exactfab,solfab, + acoeffab,bcoeffab,a,b, + nddom,prob_lo,prob_hi,dx); + }); + } + } +} diff --git a/Tests/LinearSolvers/ABecLaplacian_C/initProb_K.H b/Tests/LinearSolvers/ABecLaplacian_C/initProb_K.H index 4fcab046f0b..0b6fbe20f71 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/initProb_K.H +++ b/Tests/LinearSolvers/ABecLaplacian_C/initProb_K.H @@ -310,4 +310,74 @@ void actual_init_dphi_dz_hi (int i, int j, int k, amrex::Array4 con + .25 * std::cos(fpi*x) * std::sin(fpi*y) * (-fpi) * std::sin(fpi*z); } +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void actual_init_nodeabeclap(int i, int j, int k, + amrex::Array4 const& rhs, + amrex::Array4 const& exact, + amrex::Array4 const& sol, + amrex::Array4 const& acoef, + amrex::Array4 const& bcoef, + amrex::Real a, amrex::Real b, + amrex::Box const& nddom, + amrex::GpuArray const& prob_lo, + amrex::GpuArray const& prob_hi, + amrex::GpuArray const& dx) +{ + constexpr amrex::Real w = 0.05; + constexpr amrex::Real sigma = 10.; + const amrex::Real theta = 0.5*std::log(3.) / (w + 1.e-50); + + constexpr amrex::Real pi = 3.1415926535897932; + constexpr amrex::Real tpi = 2.*pi; + constexpr amrex::Real fpi = 4.*pi; + constexpr amrex::Real fac = static_cast(AMREX_SPACEDIM*4)*pi*pi; + + // bcoef is at cell center, whereas the rest at nodes. + if (bcoef.contains(i,j,k)) { + actual_init_bcoef(i,j,k, bcoef, prob_lo, prob_hi, dx); + } + + amrex::Real xc = (prob_hi[0] + prob_lo[0])*0.5; + amrex::Real yc = (prob_hi[1] + prob_lo[1])*0.5; +#if (AMREX_SPACEDIM == 2) + amrex::Real zc = 0.0; +#else + amrex::Real zc = (prob_hi[2] + prob_lo[2])*0.5; +#endif + + amrex::Real x = prob_lo[0] + dx[0] * (i); + amrex::Real y = prob_lo[1] + dx[1] * (j); +#if (AMREX_SPACEDIM == 2) + amrex::Real z = 0.0; +#else + amrex::Real z = prob_lo[2] + dx[2] * (k); +#endif + + amrex::Real r = std::sqrt((x-xc)*(x-xc) + (y-yc)*(y-yc) + (z-zc)*(z-zc)); + amrex::Real bcnd = (sigma-1.)/2.*std::tanh(theta*(r-0.25)) + (sigma+1.)/2.; + amrex::Real tmp = std::cosh(theta*(r-0.25)); + amrex::Real dbdrfac = (r == amrex::Real(0.0)) + ? amrex::Real(0.0) : (sigma-1.)/2./(tmp*tmp) * theta/r; + dbdrfac *= b; + + acoef(i,j,k) = 1.; + + exact(i,j,k) = std::cos(tpi*x) * std::cos(tpi*y) * std::cos(tpi*z) + + .25 * std::cos(fpi*x) * std::cos(fpi*y) * std::cos(fpi*z); + + rhs(i,j,k) = bcnd*b*fac*( std::cos(tpi*x) * std::cos(tpi*y) * std::cos(tpi*z) + + std::cos(fpi*x) * std::cos(fpi*y) * std::cos(fpi*z)) + + dbdrfac*((x-xc)*(tpi*std::sin(tpi*x) * std::cos(tpi*y) * std::cos(tpi*z) + + pi*std::sin(fpi*x) * std::cos(fpi*y) * std::cos(fpi*z)) + + (y-yc)*(tpi*std::cos(tpi*x) * std::sin(tpi*y) * std::cos(tpi*z) + + pi*std::cos(fpi*x) * std::sin(fpi*y) * std::cos(fpi*z)) + + (z-zc)*(tpi*std::cos(tpi*x) * std::cos(tpi*y) * std::sin(tpi*z) + + pi*std::cos(fpi*x) * std::cos(fpi*y) * std::sin(fpi*z))) + + a * exact(i,j,k); + + if (! nddom.strictly_contains(i,j,k)) { + sol(i,j,k) = exact(i,j,k); // domain boundary + } +} + #endif diff --git a/Tests/LinearSolvers/ABecLaplacian_C/inputs-node b/Tests/LinearSolvers/ABecLaplacian_C/inputs-node new file mode 100644 index 00000000000..2d96588d241 --- /dev/null +++ b/Tests/LinearSolvers/ABecLaplacian_C/inputs-node @@ -0,0 +1,16 @@ + +max_level = 0 +ref_ratio = 2 +n_cell = 128 +max_grid_size = 64 + +composite_solve = 0 # composite solve or level by level? + +prob_type = 4 # nodal ABecLaplacian + +# For MLMG +verbose = 2 +bottom_verbose = 0 +max_iter = 100 +agglomeration = 1 # Do agglomeration on AMR Level 0? +consolidation = 1 # Do consolidation? From 8515ea846f85656c26d842401af1b29958de7c17 Mon Sep 17 00:00:00 2001 From: Akash Dhruv Date: Sun, 1 Oct 2023 13:34:10 -0500 Subject: [PATCH 006/186] Adding bittree interface to improve regridding performance in octree mode (#3555) ## Summary This PR introduces dependency on [Bittree library](https://github.com/Flash-X/Bittree) to improve regridding performance in octree mode. ## Additional background Testing and development of this feature is done in sync with Flash-X and is recorded in this [reproducibility capsule](https://github.com/Lab-Notebooks/AMReX-Bittree-Performance). This PR is primarily created to consolidate development work and avoid creating multiple branches. At present using AMReX+Bittree improves regridding performance [by a factor of 2](https://github.com/Lab-Notebooks/AMReX-Bittree-Performance/blob/14faa2212c4e5dba7fd99a6526c6937414f9c109/analysis/Performance.ipynb) at > 20000 ranks. We hope to improve performance further using a bittree-based distribution mapping and therefore adding a new function `AmrMesh::MakeDistributionMap` as a place-holder. Continuation of #2893 and #3547 ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Weiqun Zhang Co-authored-by: Tom Klosterman --- GNUmakefile.in | 3 + Src/AmrCore/AMReX_AmrCore.cpp | 4 +- Src/AmrCore/AMReX_AmrMesh.H | 11 + Src/AmrCore/AMReX_AmrMesh.cpp | 146 ++++++- Src/AmrCore/Make.package | 3 +- Src/Extern/Bittree/AMReX_Bittree.H | 75 ++++ Src/Extern/Bittree/AMReX_Bittree.cpp | 377 ++++++++++++++++++ Src/Extern/Bittree/CMakeLists.txt | 11 + Src/Extern/Bittree/Make.package | 5 + Tests/Amr/Advection_AmrCore/Exec/GNUmakefile | 1 + .../Amr/Advection_AmrCore/Exec/inputs_bittree | 81 ++++ Tools/GNUMake/Make.defs | 6 + Tools/GNUMake/packages/Make.bittree | 16 + Tools/libamrex/configure.py | 5 + 14 files changed, 737 insertions(+), 7 deletions(-) create mode 100644 Src/Extern/Bittree/AMReX_Bittree.H create mode 100644 Src/Extern/Bittree/AMReX_Bittree.cpp create mode 100644 Src/Extern/Bittree/CMakeLists.txt create mode 100644 Src/Extern/Bittree/Make.package create mode 100644 Tests/Amr/Advection_AmrCore/Exec/inputs_bittree create mode 100644 Tools/GNUMake/packages/Make.bittree diff --git a/GNUmakefile.in b/GNUmakefile.in index 21185a705f1..b85c2e0c35e 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -45,6 +45,9 @@ endif ifeq ($(USE_SUNDIALS),TRUE) Pdirs += Extern/SUNDIALS endif +ifeq ($(USE_BITTREE),TRUE) + Pdirs += Extern/Bittree +endif Ppack := $(foreach dir, $(Pdirs), $(AMREX_HOME)/Src/$(dir)/Make.package) include $(Ppack) diff --git a/Src/AmrCore/AMReX_AmrCore.cpp b/Src/AmrCore/AMReX_AmrCore.cpp index 502b3f5cb23..ed0cd5d1020 100644 --- a/Src/AmrCore/AMReX_AmrCore.cpp +++ b/Src/AmrCore/AMReX_AmrCore.cpp @@ -104,7 +104,7 @@ AmrCore::regrid (int lbase, Real time, bool) DistributionMapping level_dmap = dmap[lev]; if (ba_changed) { level_grids = new_grids[lev]; - level_dmap = DistributionMapping(level_grids); + level_dmap = MakeDistributionMap(lev, level_grids); } const auto old_num_setdm = num_setdm; RemakeLevel(lev, time, level_grids, level_dmap); @@ -117,7 +117,7 @@ AmrCore::regrid (int lbase, Real time, bool) } else // a new level { - DistributionMapping new_dmap(new_grids[lev]); + DistributionMapping new_dmap = MakeDistributionMap(lev, new_grids[lev]); const auto old_num_setdm = num_setdm; MakeNewLevelFromCoarse(lev, time, new_grids[lev], new_dmap); SetBoxArray(lev, new_grids[lev]); diff --git a/Src/AmrCore/AMReX_AmrMesh.H b/Src/AmrCore/AMReX_AmrMesh.H index a3c6fbc8f62..2cb17d720f3 100644 --- a/Src/AmrCore/AMReX_AmrMesh.H +++ b/Src/AmrCore/AMReX_AmrMesh.H @@ -11,6 +11,10 @@ #include #include +#ifdef AMREX_USE_BITTREE +#include +#endif + namespace amrex { struct AmrInfo { @@ -253,6 +257,8 @@ public: [[nodiscard]] long CountCells (int lev) noexcept; + [[nodiscard]] virtual DistributionMapping MakeDistributionMap (int lev, BoxArray const& ba); + protected: int finest_level; //!< Current finest level. @@ -260,6 +266,11 @@ protected: Vector dmap; Vector grids; +#ifdef AMREX_USE_BITTREE + bool use_bittree = false; + std::unique_ptr btmesh; +#endif + unsigned int num_setdm = 0; unsigned int num_setba = 0; diff --git a/Src/AmrCore/AMReX_AmrMesh.cpp b/Src/AmrCore/AMReX_AmrMesh.cpp index 70a8df1dabf..efccf318315 100644 --- a/Src/AmrCore/AMReX_AmrMesh.cpp +++ b/Src/AmrCore/AMReX_AmrMesh.cpp @@ -5,6 +5,13 @@ #include #include #include +#include + +#ifdef AMREX_USE_BITTREE +#include +#endif + +#include namespace amrex { @@ -376,6 +383,10 @@ AmrMesh::InitAmrMesh (int max_level_in, const Vector& n_cell_in, finest_level = -1; +#ifdef AMREX_USE_BITTREE + pp.queryAdd("use_bittree",use_bittree); +#endif + if (check_input) { checkInput(); } } @@ -437,6 +448,26 @@ AmrMesh::LevelDefined (int lev) noexcept return lev <= max_level && !grids[lev].empty() && !dmap[lev].empty(); } +DistributionMapping +AmrMesh::MakeDistributionMap (int lev, BoxArray const& ba) +{ + + BL_PROFILE("AmrMesh::MakeDistributionMap()"); + + if (verbose) { + amrex::Print() << "Creating new distribution map on level: " << lev << "\n"; + } + +#ifdef AMREX_USE_BITTREE + // if (use_bittree) { + // return DistributionMapping(ba); + // } else +#endif + { + return DistributionMapping(ba); + } +} + void AmrMesh::ChopGrids (int lev, BoxArray& ba, int target_size) const { @@ -514,6 +545,10 @@ AmrMesh::MakeNewGrids (int lbase, Real time, int& new_finest, Vector& if (new_grids.size() < max_crse+2) { new_grids.resize(max_crse+2); } +#ifdef AMREX_USE_BITTREE + if(!use_bittree) { +#endif + // // Construct problem domain at each level. // @@ -774,6 +809,72 @@ AmrMesh::MakeNewGrids (int lbase, Real time, int& new_finest, Vector& } } } + +#ifdef AMREX_USE_BITTREE + } +#endif + +#ifdef AMREX_USE_BITTREE + // Bittree version + if(use_bittree) { + // Initialize BT refinement + btmesh->refine_init(); + + // ------------------------------------------------------------------- + // Use tagging data to mark BT for refinement, then use the new bitmap + // to calculate the new grids. + auto tree0 = btmesh->getTree(); + + // [1] Error Estimation and tagging + // btTags is indexed by bitid, Bittree's internal indexing scheme. + // For any id, btTags = 1 if should be parent, -1 if should not be parent (or not exist). + std::vector btTags(tree0->id_upper_bound(),0); + + for (int lev=max_crse; lev>=lbase; --lev) { + + TagBoxArray tags(grids[lev],dmap[lev], n_error_buf[lev]); + ErrorEst(lev, tags, time, 0); + tags.buffer(n_error_buf[lev]); + + for (MFIter mfi(tags); mfi.isValid(); ++mfi) { + auto const& tagbox = tags.const_array(mfi); + bool has_set_tags = amrex::Reduce::AnyOf(mfi.validbox(), + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + return tagbox(i,j,k)!=TagBox::CLEAR; + }); + + // Set the values of btTags. + int bitid = btUnit::getBitid(btmesh.get(),false,lev,mfi.index()); + // TODO Check lev == tree0->block_level(bitid) + if(has_set_tags) { + btTags[bitid] = 1; + } + else { + btTags[bitid] = -1; + } + } + } + + // [2] btRefine - check for proper octree nesting and update bitmap + MPI_Comm comm = ParallelContext::CommunicatorSub(); + int changed = btUnit::btRefine(btmesh.get(), btTags, max_crse, lbase, grids, dmap, comm); + + // [3] btCalculateGrids - use new bitmap to generate new grids + if (changed>0) { + btUnit::btCalculateGrids(btmesh.get(),lbase,new_finest,new_grids,max_grid_size); + } else { + new_finest = finest_level; + for(int i=0; i<=finest_level; ++i) { + new_grids[i] = grids[i]; + } + } + + // Finalize BT refinement + btmesh->refine_apply(); + } +#endif + } void @@ -783,11 +884,48 @@ AmrMesh::MakeNewGrids (Real time) { finest_level = 0; - const BoxArray& ba = MakeBaseGrids(); - DistributionMapping dm(ba); + BoxArray ba; + DistributionMapping dm; const auto old_num_setdm = num_setdm; const auto old_num_setba = num_setba; +#ifdef AMREX_USE_BITTREE + if(!use_bittree) { +#endif + ba = MakeBaseGrids(); + dm = MakeDistributionMap(0, ba); + +#ifdef AMREX_USE_BITTREE + } + else { + //Initialize Bittree + + // top = number of grids on coarsest level in each direction + std::vector top(AMREX_SPACEDIM,0); + IntVect ncells = geom[0].Domain().length(); + for(int i=0; i includes(ngrids,1); + + btmesh = std::make_unique(top.data(),includes.data()); + + // Set BCs + for(int d=0; d +#include +#include +#include + +namespace amrex { + +/* +Include in Make.local: +BITTREE_PATH = /path/to/bittree/installation +INCLUDE_LOCATIONS += $(BITTREE_PATH)/include +LIBRARY_LOCATIONS += $(BITTREE_PATH)/lib +LIBRARIES += -lbittree + +Include in inputs: +amr.use_bittree = true +*/ + +class btUnit { + // Functions used in AmrMesh + public: + static int btRefine(bittree::BittreeAmr* const mesh, + std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, + MPI_Comm comm); + static void btCalculateGrids(bittree::BittreeAmr* const mesh, + int lbase, + int& new_finest, + Vector& new_grids, + Vector const& max_grid_size); + static void btCalculateLevel(bittree::BittreeAmr* const mesh, + int lev, + BoxArray& ba, + IntVect const& max_grid_size); + // Utils + public: + static int getBitid(bittree::BittreeAmr* const mesh, bool updated, + int lev, int idx_on_lev); + static int getIndex(bittree::BittreeAmr* const mesh, bool updated, + int lev, int bitid); + + // Functions to implement strict octree logic + private: + static void btCheckRefine(bittree::BittreeAmr* const mesh, + std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, + MPI_Comm comm); + + static void btCheckDerefine(bittree::BittreeAmr* const mesh, + std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, + MPI_Comm comm); + + // Utility Functions + static bool checkNeighborsRefine(bittree::BittreeAmr* const mesh, + bittree::MortonTree::Block b); + static std::vector neighIntCoords(bittree::BittreeAmr* const mesh, + unsigned lev, unsigned* lcoord, + int* gCell); + + public: + // Represents whether domain has periodic BC in each direction + // true = Periodic, false = Non-Periodic + static bool bcPeriodic[AMREX_SPACEDIM]; +}; + + +} +#endif diff --git a/Src/Extern/Bittree/AMReX_Bittree.cpp b/Src/Extern/Bittree/AMReX_Bittree.cpp new file mode 100644 index 00000000000..543b5688208 --- /dev/null +++ b/Src/Extern/Bittree/AMReX_Bittree.cpp @@ -0,0 +1,377 @@ +#include +#include +#include +#include + +using namespace bittree; + +namespace amrex { +static constexpr int K1D = unsigned(AMREX_SPACEDIM>=1); +static constexpr int K2D = unsigned(AMREX_SPACEDIM>=2); +static constexpr int K3D = unsigned(AMREX_SPACEDIM>=3); + + +bool btUnit::bcPeriodic[AMREX_SPACEDIM]; + +/* +NOTE: Bittree object is created in AmrMesh::MakeNewGrids (Real time) + with + `mesh = std::make_shared(top,includes);` + +The functions here are called in the BT version of MakeNewGrids which has three steps: + 1. Error Estimation and tagging - btTagging + 2. Bitree's actual bitmap generated/updated - btRefine + 3. AMReX updates grids based on bitree - btCalculateGrids +*/ + + +/** New Bittree mesh is generated. + * + * This makes use of BT library functions and as well as routines adapted + * from Flash-X that enforce Octree nesting. + */ +int btUnit::btRefine( BittreeAmr* const mesh, std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, MPI_Comm comm) { + + BL_PROFILE("Bittree-btRefine"); + + // Tree before refinement. With only one rank, lnblocks = nblocks. + auto tree0 = mesh->getTree(); + + // Mark leaves to be refined + for (int lev=max_crse; lev>=lbase; --lev) { + for (MFIter mfi(grids[lev], dmap[lev]); mfi.isValid(); ++mfi) { + int id = getBitid(mesh,false,lev,mfi.index()); + if (btTags[id]==1) { + if(!tree0->block_is_parent(id)) { + mesh->refine_mark(id, true); + } + } + } + } + + mesh->refine_reduce(comm); + mesh->refine_update(); + + btCheckRefine(mesh, btTags, max_crse, lbase, grids, dmap, comm); + + // Mark derefinement (parents who will nodetype change to leaf) + for (int lev=max_crse; lev>=lbase; --lev) { + for (MFIter mfi(grids[lev], dmap[lev]); mfi.isValid(); ++mfi) { + int id = getBitid(mesh,false,lev,mfi.index()); + if (btTags[id]==-1) { + if(tree0->block_is_parent(id)) { + mesh->refine_mark(id, true); + } + } + } + } + + mesh->refine_reduce(comm); + mesh->refine_update(); + + btCheckDerefine(mesh, btTags, max_crse, lbase, grids, dmap, comm); + + // return delta count + return static_cast( mesh->delta_count() ); +} + +/** Creates new box arrays to match the new Bittree mesh. + */ +void btUnit::btCalculateGrids(BittreeAmr* const mesh, int lbase, + int& new_finest, + Vector& new_grids, + Vector const& max_grid_size) { + BL_PROFILE("Bittree-btCalculateGrids"); + + auto tree1 = mesh->getTree(true); + int nlevs = tree1->levels(); + new_finest = nlevs - 1; + +//--Calculate the new grid layout and distribution map based on Bittree + for(int lev=lbase; lev<=new_finest; ++lev) { + btCalculateLevel(mesh, lev, new_grids[lev], + max_grid_size[lev]); + } +} + +/** Creates a box array based on Bittree. + */ +void btUnit::btCalculateLevel(BittreeAmr* const mesh, int lev, + BoxArray& ba, + IntVect const& max_grid_size) { + auto tree1 = mesh->getTree(true); + + //Bittree has its own indices for blocks which I call bitid; get + //the range of bitids for the level being made. Bitid range is + //contiguous for each level. + int id0 = tree1->level_id0(lev); + int id1 = tree1->level_id1(lev); + int nblocks = tree1->level_blocks(lev); + + BoxList bl; + + for(int i=id0; ilocate(i); + + if(b.level != lev) { + std::string msg = "Error identifying block in btCalculateGrids"; + //throw error? + } + + IntVect coordVec{AMREX_D_DECL(static_cast(b.coord[0]), + static_cast(b.coord[1]), + static_cast(b.coord[2]))}; + IntVect lo = max_grid_size*coordVec; + IntVect hi = max_grid_size*(coordVec+1) - 1; + bl.push_back( Box{lo,hi} ); + } + + ba = BoxArray(bl); +} + +int btUnit::getBitid(BittreeAmr* const mesh, bool updated, + int lev, int idx_on_lev) { + return idx_on_lev + mesh->getTree(updated)->level_id0(lev); +} + +int btUnit::getIndex(BittreeAmr* const mesh, bool updated, + int lev, int bitid) { + return bitid - mesh->getTree(updated)->level_id0(lev); +} + + + +//--------------------------------------------------------------------- +// Local Routines +//--------------------------------------------------------------------- + +/** Implements the logic which ensures the generated Bittree adheres + * to a strict octree structure with no more than one level difference + * between surrounding leaf blocks. + */ +void btUnit::btCheckRefine(BittreeAmr* const mesh, std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, MPI_Comm comm) { + + BL_PROFILE("Bittree-btCheckRefine"); + + // Tree before refinement. + auto tree0 = mesh->getTree(); + + // Ref test is marked 1 if block needs a tag (and doesn't have one). + std::vector ref_test(tree0->id_upper_bound()); + + // Repeat is made true if another round is needed + bool repeat = false; + + do { + // Clear out ref_test + std::fill(ref_test.begin(),ref_test.end(),0); + + // Check neighbors - if any adjacent child of a neighbor is either a parent + // or marked for refinement, this block needs to be refined. + for (int lev=max_crse; lev>=lbase; --lev) { + for (MFIter mfi(grids[lev], dmap[lev]); mfi.isValid(); ++mfi) { + int id = getBitid(mesh,false,lev,mfi.index()); + auto b = tree0->locate(id); + if( !b.is_parent && btTags[id]!=1 ) { + bool needsTag = checkNeighborsRefine( mesh, b); + //amrex::Print() << "needsTag for " << id << " : " << needsTag <=lbase; --lev) { + for (MFIter mfi(grids[lev], dmap[lev]); mfi.isValid(); ++mfi) { + int id = getBitid(mesh,false,lev,mfi.index()); + if( ref_test[id]==1 && btTags[id]!=1 ) { + repeat = true; + btTags[id] = 1; + mesh->refine_mark(id,true); + } + } + } + + // If only processing local blocks, check all processors to see if + // a repeat is necessary, then reduce bittree to update on all ranks. + ParallelDescriptor::ReduceBoolOr(repeat); + + if(repeat) { + mesh->refine_reduce(comm); + mesh->refine_update(); + } + + } while(repeat); +} + + +/** Implements the logic which ensures the generated Bittree adheres + * to a strict octree structure with no more than one level difference + * between surrounding leaf blocks. + */ +void btUnit::btCheckDerefine(BittreeAmr* const mesh, std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, MPI_Comm comm) { + + BL_PROFILE("Bittree-btCheckDerefine"); + + // Tree before refinement. With only one rank, lnblocks = nblocks. + auto tree0 = mesh->getTree(); + + std::vector deref_test(tree0->id_upper_bound()); + + // Repeat is made true if another round is needed + bool repeat = false; + + // Repeat is left true if another round is needed + do { + // Turn deref_test to default 0 if block can't be derefined + deref_test = btTags; + + // Check neighbors - if any adjacent child of neighbor is either a parent + // or marked for refinement, do not derefine. + for (int lev=max_crse; lev>=lbase; --lev) { + for (MFIter mfi(grids[lev], dmap[lev]); mfi.isValid(); ++mfi) { + int id = getBitid(mesh,false,lev,mfi.index()); + auto b = tree0->locate(id); + if( btTags[id]==-1 ) { + bool cantDeref = checkNeighborsRefine( mesh, b); + if(cantDeref) { + deref_test[id] = 0; + } + } + } + } + + // Unmark any blocks who cannot derefine (as per above check). + repeat = false; + for (int lev=max_crse; lev>=lbase; --lev) { + for (MFIter mfi(grids[lev], dmap[lev]); mfi.isValid(); ++mfi) { + int id = getBitid(mesh,false,lev,mfi.index()); + if( deref_test[id]==0 && btTags[id]==-1 ) { + repeat = true; + btTags[id] = 0; + + // Unmark for derefinement + mesh->refine_mark(id, false); + } + } + } + + // If only processing local blocks, check all processors to see if + // a repeat is necessary, then reduce bittree to update on all ranks. + ParallelDescriptor::ReduceBoolOr(repeat); + + if(repeat) { + mesh->refine_reduce_and(comm); + mesh->refine_update(); + } + + } while(repeat); +} + + +// Check all neighbors to see if their adjacent children are parents or marked for refinement. +bool btUnit::checkNeighborsRefine(BittreeAmr* const mesh, MortonTree::Block b) { + + BL_PROFILE("Bittree-checkNeighborsRefine"); + + auto tree0 = mesh->getTree(); + auto tree1 = mesh->getTree(true); + int nIdx[3], cIdx[3]; + unsigned childCoord_u[AMREX_SPACEDIM]; + + // Loop over neighbors + for(nIdx[2]= -1*K3D; nIdx[2]<= K3D; ++nIdx[2]) { + for(nIdx[1]= -1*K2D; nIdx[1]<= K2D; ++nIdx[1]) { + for(nIdx[0]= -1*K1D; nIdx[0]<= K1D; ++nIdx[0]) { + std::vector nCoord = neighIntCoords(mesh, b.level, b.coord, nIdx); + + // If neighbor is outside domain or otherwise invalid, continue. + if(AMREX_D_TERM(nCoord[0]<0, || nCoord[1]<0, || nCoord[2]<0 )) { + continue; + } + + // Identify neighbor from Bittree. + unsigned neighCoord_u[AMREX_SPACEDIM]; + for(unsigned d=0; d(nCoord[d]); + } + auto n = tree0->identify(b.level, neighCoord_u); + if(b.level==n.level && n.is_parent) { + // Loop over children of neighbor. + for(cIdx[2]= 0; cIdx[2]<= K3D; ++cIdx[2]) { + for(cIdx[1]= 0; cIdx[1]<= K2D; ++cIdx[1]) { + for(cIdx[0]= 0; cIdx[0]<= K1D; ++cIdx[0]) { + + // Only check adjacent children + if (( ((1-nIdx[0])/2)==cIdx[0] || nIdx[0] == 0 ) && + ( ((1-nIdx[1])/2)==cIdx[1] || nIdx[1] == 0 ) && + ( ((1-nIdx[2])/2)==cIdx[2] || nIdx[2] == 0 )) { + + // Identify child on updated tree + for(unsigned d=0; d(cIdx[d]); + } + auto c = tree1->identify(n.level+1, childCoord_u); + + // If child WILL be parent, return true + if( c.level==(b.level+1) && c.is_parent) { + return true; + } + } + }}} + } + }}} + + // Return false otherwise + return false; +} + +/** Calculate integer coordinates of neighbors, taking into account BCs. + * Currently assuming Periodic in all directions. + */ +std::vector btUnit::neighIntCoords(BittreeAmr* const mesh, + unsigned lev, unsigned* lcoord, int* gCell) { + auto tree = mesh->getTree(); + + std::vector neighCoord(AMREX_SPACEDIM); + +//--Calculate integer coordinates of neighbor in direction + for(unsigned d=0;d(lcoord[d]) + gCell[d]; + +//--Make sure not out-of-bounds. If periodic BCs, apply modulo + std::vector maxcoord(AMREX_SPACEDIM); + for(unsigned d=0;d(tree->top_size(d)) << lev; + + for(unsigned d=0;d= maxcoord[d]) { + if ( bcPeriodic[d] == true ) + neighCoord[d] = neighCoord[d] - maxcoord[d]; + else + neighCoord[d] = -1; + } + + } + + return neighCoord; +} + +} diff --git a/Src/Extern/Bittree/CMakeLists.txt b/Src/Extern/Bittree/CMakeLists.txt new file mode 100644 index 00000000000..d51454b2172 --- /dev/null +++ b/Src/Extern/Bittree/CMakeLists.txt @@ -0,0 +1,11 @@ +target_include_directories( amrex + PUBLIC + $) + +add_amrex_define(AMREX_USE_BITTREE NO_LEGACY) + +target_sources( amrex + PRIVATE + AMReX_Bittree.H + AMReX_Bittree.cpp + ) diff --git a/Src/Extern/Bittree/Make.package b/Src/Extern/Bittree/Make.package new file mode 100644 index 00000000000..0aefb6f300a --- /dev/null +++ b/Src/Extern/Bittree/Make.package @@ -0,0 +1,5 @@ +CEXE_headers += AMReX_Bittree.H +CEXE_sources += AMReX_Bittree.cpp + +VPATH_LOCATIONS += $(AMREX_HOME)/Src/Extern/Bittree +INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Extern/Bittree diff --git a/Tests/Amr/Advection_AmrCore/Exec/GNUmakefile b/Tests/Amr/Advection_AmrCore/Exec/GNUmakefile index 284f9942c50..b7242d85957 100644 --- a/Tests/Amr/Advection_AmrCore/Exec/GNUmakefile +++ b/Tests/Amr/Advection_AmrCore/Exec/GNUmakefile @@ -14,6 +14,7 @@ USE_MPI = TRUE USE_OMP = FALSE USE_CUDA = FALSE USE_PARTICLES = TRUE +USE_BITTREE = FALSE Bpack := ./Make.package Blocs := . diff --git a/Tests/Amr/Advection_AmrCore/Exec/inputs_bittree b/Tests/Amr/Advection_AmrCore/Exec/inputs_bittree new file mode 100644 index 00000000000..b136cbdc951 --- /dev/null +++ b/Tests/Amr/Advection_AmrCore/Exec/inputs_bittree @@ -0,0 +1,81 @@ +# ***************************************************************** +# Run until nsteps == max_step or time == stop_time, +# whichever comes first +# ***************************************************************** +max_step = 1000 +stop_time = 2.0 + +# ***************************************************************** +# Are we restarting from an existing checkpoint file? +# ***************************************************************** +#amr.restart = chk00060 # restart from this checkpoint file + +# ***************************************************************** +# Problem size and geometry +# ***************************************************************** +geometry.prob_lo = 0.0 0.0 0.0 +geometry.prob_hi = 1.0 1.0 0.125 +geometry.is_periodic = 1 1 1 + +# ***************************************************************** +# VERBOSITY +# ***************************************************************** +amr.v = 1 # verbosity in Amr + +# ***************************************************************** +# Resolution and refinement +# ***************************************************************** +amr.n_cell = 64 64 8 +amr.max_level = 2 # maximum level number allowed -- + # number of levels = max_level + 1 + +amr.ref_ratio = 2 2 2 2 # refinement ratio between levels + +# ***************************************************************** +# Control of grid creation +# ***************************************************************** +# Blocking factor for grid creation in each dimension -- +# this ensures that every grid is coarsenable by a factor of 8 -- +# this is mostly relevant for multigrid performance +amr.blocking_factor_x = 8 +amr.blocking_factor_y = 8 +amr.blocking_factor_z = 8 + +amr.max_grid_size = 16 + +amr.regrid_int = 2 # how often to regrid + +amr.use_bittree = 1 + +# ***************************************************************** +# Time step control +# ***************************************************************** +adv.cfl = 0.7 # CFL constraint for explicit advection + +adv.do_subcycle = 1 # Do we subcycle in time? + +# ***************************************************************** +# Should we reflux at coarse-fine boundaries? +# ***************************************************************** +adv.do_reflux = 1 + +# ***************************************************************** +# Tagging - if phi > 1.01 at level 0, then refine +# if phi > 1.1 at level 1, then refine +# if phi > 1.5 at level 2, then refine +# ***************************************************************** +adv.phierr = 1.01 1.1 1.5 + +# ***************************************************************** +# Plotfile name and frequency +# ***************************************************************** +amr.plot_file = plt # root name of plot file +amr.plot_int = 10 # number of timesteps between plot files + # if negative then no plot files will be written + +# ***************************************************************** +# Checkpoint name and frequency +# ***************************************************************** +amr.chk_file = chk # root name of checkpoint file +amr.chk_int = -1 # number of timesteps between checkpoint files + # if negative then no checkpoint files will be written diff --git a/Tools/GNUMake/Make.defs b/Tools/GNUMake/Make.defs index 7b2ba129b83..8ec8832d1eb 100644 --- a/Tools/GNUMake/Make.defs +++ b/Tools/GNUMake/Make.defs @@ -1067,6 +1067,12 @@ ifeq ($(USE_HDF5),TRUE) include $(AMREX_HOME)/Tools/GNUMake/packages/Make.hdf5 endif +ifeq ($(USE_BITTREE),TRUE) + $(info Loading $(AMREX_HOME)/Tools/GNUMake/packages/Make.bittree...) + include $(AMREX_HOME)/Tools/GNUMake/packages/Make.bittree +endif + + ifneq ("$(wildcard $(AMREX_HOME)/Tools/GNUMake/Make.local)","") $(info Loading $(AMREX_HOME)/Tools/GNUMake/Make.local...) include $(AMREX_HOME)/Tools/GNUMake/Make.local diff --git a/Tools/GNUMake/packages/Make.bittree b/Tools/GNUMake/packages/Make.bittree new file mode 100644 index 00000000000..98758a915c2 --- /dev/null +++ b/Tools/GNUMake/packages/Make.bittree @@ -0,0 +1,16 @@ + +CPPFLAGS += -DAMREX_USE_BITTREE +include $(AMREX_HOME)/Src/Extern/Bittree/Make.package + +ifndef AMREX_BITTREE_HOME +ifdef BITTREE_$(DIM)D_HOME + AMREX_BITTREE_HOME = $(BITTREE_$(DIM)D_HOME) +endif +endif + +ifdef AMREX_BITTREE_HOME + BITTREE_ABSPATH = $(abspath $(AMREX_BITTREE_HOME)) + INCLUDE_LOCATIONS += $(BITTREE_ABSPATH)/include + LIBRARY_LOCATIONS += $(BITTREE_ABSPATH)/lib + LIBRARIES += -Wl,-rpath,$(BITTREE_ABSPATH)/lib -lbittree +endif diff --git a/Tools/libamrex/configure.py b/Tools/libamrex/configure.py index 42493e4647e..1545f86dfb2 100755 --- a/Tools/libamrex/configure.py +++ b/Tools/libamrex/configure.py @@ -116,6 +116,10 @@ def configure(argv): help="Only relevant to Amr/AmrLevel based codes that need to read probin file or call amrex_probinit", choices=["yes","no"], default="yes") + parser.add_argument("--enable-bittree", + help="Enable Bittree mode [default=no]", + choices=["yes","no"], + default="no") args = parser.parse_args() if args.with_fortran == "no": @@ -154,6 +158,7 @@ def configure(argv): f.write("USE_COMPILE_PIC = {}\n".format("FALSE" if args.enable_pic == "no" else "TRUE")) f.write("CUDA_ARCH = " + args.cuda_arch.strip() + "\n") f.write("AMREX_NO_PROBINIT = {}\n".format("TRUE" if args.enable_probinit == "no" else "FALSE")) + f.write("USE_BITTREE = {}\n".format("TRUE" if args.enable_bittree == "yes" else "FALSE")) f.write("\n") fin = open("GNUmakefile.in","r") From e470d3350ed34686d46a461f7c5e704a0e027efc Mon Sep 17 00:00:00 2001 From: drangara <69211175+drangara@users.noreply.github.com> Date: Mon, 2 Oct 2023 13:12:50 -0400 Subject: [PATCH 007/186] simplify how 2d surface integrals are computed (#3571) Previously, the 2D surface integrals were generated after calculating the intersection points of the EB on each of the cell faces. With this change, the intersection points are determined directly from `bcent`, `bnorm` and `barea`. This approach seems to simplify the need to handle special cases. --- Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H | 96 ++++--------------- .../MLMG/AMReX_MLNodeLaplacian_eb.cpp | 9 +- 2 files changed, 20 insertions(+), 85 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H index 1de55f8a63d..d1ae9e0b7ed 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H @@ -2282,97 +2282,35 @@ void mlndlap_set_integral_eb (int i, int j, int, Array4 const& intg, AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_set_surface_integral_eb (int i, int j, int, Array4 const& sintg, - Array4 const& flag, Array4 const& vol, - Array4 const& ax, Array4 const& ay, + Array4 const& flag, Array4 const& bcen, - Array4 const& barea) noexcept + Array4 const& barea, + Array4 const& bnorm) noexcept { if (flag(i,j,0).isCovered() || flag(i,j,0).isRegular()) { sintg(i,j,0,i_B_x ) = Real(0.); sintg(i,j,0,i_B_y ) = Real(0.); sintg(i,j,0,i_B_xy) = Real(0.); } else { - Real axm = ax(i,j,0); - Real axp = ax(i+1,j,0); - Real aym = ay(i,j,0); - Real ayp = ay(i,j+1,0); + Real bcx = bcen(i,j,0,0); + Real bcy = bcen(i,j,0,1); - Real apnorm = std::sqrt((axm-axp)*(axm-axp) + (aym-ayp)*(aym-ayp)); - if (apnorm == Real(0.)) { - amrex::Abort("amrex_mlndlap_set_surface_integral: we are in trouble"); - } - - if (vol(i,j,0) >= almostone) { - sintg(i,j,0,i_B_x ) = Real(0.); - sintg(i,j,0,i_B_y ) = Real(0.); - sintg(i,j,0,i_B_xy) = Real(0.); - if (axm < Real(1.)) { - sintg(i,j,0,i_B_x) = Real(-0.5)*barea(i,j,0); - } else if (aym < Real(1.)) { - sintg(i,j,0,i_B_y) = Real(-0.5)*barea(i,j,0); - } else if (axp < Real(1.)) { - sintg(i,j,0,i_B_x) = Real( 0.5)*barea(i,j,0); - } else if (ayp < Real(1.)) { - sintg(i,j,0,i_B_y) = Real( 0.5)*barea(i,j,0); - } else { - amrex::Abort("amrex_mlndlap_set_surface_integral: we are in trouble"); - } - } else { - Real apnorminv = Real(1.)/apnorm; - Real anrmx = (axm-axp) * apnorminv; // pointing to the wall - Real anrmy = (aym-ayp) * apnorminv; - - Real bcx = bcen(i,j,0,0); - Real bcy = bcen(i,j,0,1); - - Real c = -(bcx * anrmx + bcy * anrmy); - - GpuArray pts; //intersection points - int np = 0; - if (std::abs(anrmx) <= almostzero) { - pts[np++] = RealVect{Real(-0.5), Real(-c + Real(0.5)*anrmx)/anrmy}; - pts[np++] = RealVect{Real( 0.5), Real(-c - Real(0.5)*anrmx)/anrmy}; - } else if (std::abs(anrmy) <= almostzero) { - pts[np++] = RealVect{Real(-c + Real(0.5)*anrmy)/anrmx, Real(-0.5)}; - pts[np++] = RealVect{Real(-c - Real(0.5)*anrmy)/anrmx, Real( 0.5)}; - } else { - if ( (axm > Real(0.) && axm < Real(1.)) - || (axm > Real(0.) && aym == Real(0.)) - || (axm > Real(0.) && ayp == Real(0.))) { - pts[np++] = RealVect{Real(-0.5), Real(-c + Real(0.5)*anrmx)/anrmy}; - } - if ( (axp > Real(0.) && axp < Real(1.)) - || (axp > Real(0.) && aym == Real(0.)) - || (axp > Real(0.) && ayp == Real(0.))) { - pts[np++] = RealVect{Real( 0.5), Real(-c - Real(0.5)*anrmx)/anrmy}; - } - if ( (aym > Real(0.) && aym < Real(1.)) - || (aym > Real(0.) && axm == Real(0.)) - || (aym > Real(0.) && axp == Real(0.))) { - pts[np++] = RealVect{Real(-c + Real(0.5)*anrmy)/anrmx, Real(-0.5)}; - } - if ( (ayp > Real(0.) && ayp < Real(1.)) - || (ayp > Real(0.) && axm == Real(0.)) - || (ayp > Real(0.) && axp == Real(0.))) { - pts[np++] = RealVect{Real(-c - Real(0.5)*anrmy)/anrmx, Real( 0.5)}; - } - } + Real btanx = bnorm(i,j,0,1); + Real btany = -bnorm(i,j,0,0); - if (np != 2) { - amrex::Abort("amrex_mlndlap_set_surface_integral: we are in trouble"); - } + Real x0 = bcx - Real(0.5)*barea(i,j,0)*btanx; + Real x1 = bcx + Real(0.5)*barea(i,j,0)*btanx; - Real x0 = pts[0][0], x1 = pts[1][0]; - Real y0 = pts[0][1], y1 = pts[1][1]; + Real y0 = bcy - Real(0.5)*barea(i,j,0)*btany; + Real y1 = bcy + Real(0.5)*barea(i,j,0)*btany; - Real Bx = barea(i,j,0)*Real(0.5)*(x1 + x0); - Real By = barea(i,j,0)*Real(0.5)*(y1 + y0); - Real Bxy = barea(i,j,0)*(x0*y0 + (x0*(y1 - y0) + y0*(x1 - x0))/Real(2.) + (x1 - x0)*(y1 - y0)/Real(3.)); + Real Bx = barea(i,j,0)*Real(0.5)*(x1 + x0); + Real By = barea(i,j,0)*Real(0.5)*(y1 + y0); + Real Bxy = barea(i,j,0)*(x0*y0 + (x0*(y1 - y0) + y0*(x1 - x0))/Real(2.) + (x1 - x0)*(y1 - y0)/Real(3.)); - sintg(i,j,0,i_B_x ) = Bx; - sintg(i,j,0,i_B_y ) = By; - sintg(i,j,0,i_B_xy) = Bxy; - } + sintg(i,j,0,i_B_x ) = Bx; + sintg(i,j,0,i_B_y ) = By; + sintg(i,j,0,i_B_xy) = Bxy; } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_eb.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_eb.cpp index 49f80ad4c11..3088a29cb24 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_eb.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_eb.cpp @@ -99,10 +99,9 @@ MLNodeLaplacian::buildSurfaceIntegral () { const int ncomp = sintg->nComp(); const auto& flags = factory->getMultiEBCellFlagFab(); - const auto& vfrac = factory->getVolFrac(); - const auto& area = factory->getAreaFrac(); const auto& bcent = factory->getBndryCent(); const auto& barea = factory->getBndryArea(); + const auto& bnorm = factory->getBndryNormal(); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) { mfi_info.EnableTiling().SetDynamic(true); } @@ -128,14 +127,12 @@ MLNodeLaplacian::buildSurfaceIntegral () }); } else { Array4 const& flagarr = flags.const_array(mfi); - Array4 const& vfracarr = vfrac.const_array(mfi); - Array4 const& axarr = area[0]->const_array(mfi); - Array4 const& ayarr = area[1]->const_array(mfi); Array4 const& bcarr = bcent.const_array(mfi); Array4 const& baarr = barea.const_array(mfi); + Array4 const& bnarr = bnorm.const_array(mfi); AMREX_HOST_DEVICE_FOR_3D(bx, i, j, k, { - mlndlap_set_surface_integral_eb(i,j,k,garr,flagarr,vfracarr,axarr,ayarr,bcarr,baarr); + mlndlap_set_surface_integral_eb(i,j,k,garr,flagarr,bcarr,baarr,bnarr); }); } } From 87556458d867cb9cb2f148b4bcfc920c67bc918a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 3 Oct 2023 17:13:36 -0700 Subject: [PATCH 008/186] NodeABecLaplacian: Reuse (#3579) Add reuse capability to NodeABecLaplacian. More specifically, we need to average down coefficients again. --- .codespell-ignore-words | 1 + Src/AmrCore/AMReX_AmrMesh.H | 2 +- Src/Base/AMReX_CoordSys.H | 2 +- Src/Base/AMReX_FACopyDescriptor.H | 2 +- Src/Base/AMReX_FArrayBox.H | 6 +++--- Src/Base/AMReX_IArrayBox.H | 2 +- Src/Base/AMReX_NonLocalBC.H | 2 +- Src/EB/AMReX_EB_StateRedistribute.cpp | 4 ++-- Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp | 2 +- Src/Extern/HYPRE/AMReX_HypreSolver.H | 2 +- Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H | 6 ++++++ .../MLMG/AMReX_MLNodeABecLaplacian.cpp | 14 ++++++++++++++ Tests/HDF5Benchmark/sz.config | 2 +- Tools/CMake/AMReXBuildInfo.cmake | 2 +- Tools/CMake/AMReXClangTidy.cmake | 2 +- Tools/C_scripts/mkdep | 2 +- 16 files changed, 37 insertions(+), 16 deletions(-) diff --git a/.codespell-ignore-words b/.codespell-ignore-words index 4e265268e63..4069bf5a026 100644 --- a/.codespell-ignore-words +++ b/.codespell-ignore-words @@ -27,6 +27,7 @@ parms pres ptd recuse +shft siz structed te diff --git a/Src/AmrCore/AMReX_AmrMesh.H b/Src/AmrCore/AMReX_AmrMesh.H index 2cb17d720f3..f5d49f5c5da 100644 --- a/Src/AmrCore/AMReX_AmrMesh.H +++ b/Src/AmrCore/AMReX_AmrMesh.H @@ -170,7 +170,7 @@ public: void SetGridEff (Real eff) noexcept { grid_eff = eff; } void SetNProper (int n) noexcept { n_proper = n; } - //! Set ref_ratio would require rebuiling Geometry objects. + //! Set ref_ratio would require rebuilding Geometry objects. void SetFinestLevel (int new_finest_level) noexcept { finest_level = new_finest_level; } void SetDistributionMap (int lev, const DistributionMapping& dmap_in) noexcept; diff --git a/Src/Base/AMReX_CoordSys.H b/Src/Base/AMReX_CoordSys.H index d558ba053db..ab946ffa3d0 100644 --- a/Src/Base/AMReX_CoordSys.H +++ b/Src/Base/AMReX_CoordSys.H @@ -54,7 +54,7 @@ public: BL_ASSERT(c_sys != undef); return (c_sys == RZ); } - //! Is CoordType == cartesion? + //! Is CoordType == cartesian? [[nodiscard]] bool IsCartesian () const noexcept { BL_ASSERT(c_sys != undef); return (c_sys == cartesian); } diff --git a/Src/Base/AMReX_FACopyDescriptor.H b/Src/Base/AMReX_FACopyDescriptor.H index 5194ca7f876..7e1e383d237 100644 --- a/Src/Base/AMReX_FACopyDescriptor.H +++ b/Src/Base/AMReX_FACopyDescriptor.H @@ -698,7 +698,7 @@ FabArrayCopyDescriptor::CollectData () amrex::The_Arena()->free(md_recv_data); } - // Wait and upack data + // Wait and unpack data if (N_rcvs > 0) { Vector stats(N_rcvs); diff --git a/Src/Base/AMReX_FArrayBox.H b/Src/Base/AMReX_FArrayBox.H index 45e49ebab93..084b38ce46b 100644 --- a/Src/Base/AMReX_FArrayBox.H +++ b/Src/Base/AMReX_FArrayBox.H @@ -23,7 +23,7 @@ class FArrayBox; * only want to write out 32 bit FABs. * * With the exception of the enumeration constants, this class is -* primarily for FArrayBox implementors; i.e. user's shouldn't +* primarily for FArrayBox implementers; i.e. user's shouldn't * call any of the member functions in this class directly. */ @@ -241,7 +241,7 @@ public: /** * \brief Construct an initial FAB with the data space allocated but - * not inititialized. ncomp is the number of components + * not initialized. ncomp is the number of components * (variables) at each data point in the Box. */ explicit FArrayBox (const Box& b, @@ -409,7 +409,7 @@ public: /** * \brief Set the FABio::Format in the program. * This is the preferred way to set the output format - * in "new" FABs. When desiging new programs, this should + * in "new" FABs. When designing new programs, this should * be the only function that needs to be called in order * to set the format. */ diff --git a/Src/Base/AMReX_IArrayBox.H b/Src/Base/AMReX_IArrayBox.H index 4d39ace1012..b5240395f02 100644 --- a/Src/Base/AMReX_IArrayBox.H +++ b/Src/Base/AMReX_IArrayBox.H @@ -58,7 +58,7 @@ public: /** * \brief Construct an initial FAB with the data space allocated but - * not inititialized. ncomp is the number of components + * not initialized. ncomp is the number of components * (variables) at each data point in the Box. */ explicit IArrayBox (const Box& b, diff --git a/Src/Base/AMReX_NonLocalBC.H b/Src/Base/AMReX_NonLocalBC.H index fc7f4cb6583..f7f22a67198 100644 --- a/Src/Base/AMReX_NonLocalBC.H +++ b/Src/Base/AMReX_NonLocalBC.H @@ -264,7 +264,7 @@ struct MultiBlockCommMetaData : FabArrayBase::CommMetaData { // [concept.FabProjection] // -//! \brief This type trait tests if a type P is a projetion for FAB. +//! \brief This type trait tests if a type P is a projection for FAB. template struct IsFabProjection : IsCallableR, Dim3, int> diff --git a/Src/EB/AMReX_EB_StateRedistribute.cpp b/Src/EB/AMReX_EB_StateRedistribute.cpp index 79e661d2c90..faa77539f90 100644 --- a/Src/EB/AMReX_EB_StateRedistribute.cpp +++ b/Src/EB/AMReX_EB_StateRedistribute.cpp @@ -176,7 +176,7 @@ MLStateRedistribute ( Box const& bx, int ncomp, if (domain_per_grown.contains(IntVect(AMREX_D_DECL(r,s,t)))) { - // Initialize so that the slope stencil goes from -1:1 in each diretion + // Initialize so that the slope stencil goes from -1:1 in each direction int nx = 1; int ny = 1; int nz = 1; // Do we have enough extent in each coordinate direction to use the 3x3x3 stencil @@ -511,7 +511,7 @@ StateRedistribute ( Box const& bx, int ncomp, bool extdir_khi = (d_bcrec_ptr[n].hi(2) == amrex::BCType::ext_dir || d_bcrec_ptr[n].hi(2) == amrex::BCType::hoextrap); #endif - // Initialize so that the slope stencil goes from -1:1 in each diretion + // Initialize so that the slope stencil goes from -1:1 in each direction int nx = 1; int ny = 1; int nz = 1; // Do we have enough extent in each coordinate direction to use the 3x3x3 stencil diff --git a/Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp b/Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp index 79b35a5d58f..448869ca359 100644 --- a/Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp @@ -47,7 +47,7 @@ HypreNodeLap::HypreNodeLap (const BoxArray& grids_, const DistributionMapping& d Int nnodes_proc = fill_local_node_id(); // At this point, local_node_id stores the ids local to each box. - // nnodes_grid stroes the number of nodes in each box. nnodes_proc is + // nnodes_grid stores the number of nodes in each box. nnodes_proc is // the number of nodes on this MPI process. If a nodal is invalid, its // id is invalid (i.e., a very negative number). Note that the data // type of local_node_id is int, not HYPRE_Int for performance on GPU. diff --git a/Src/Extern/HYPRE/AMReX_HypreSolver.H b/Src/Extern/HYPRE/AMReX_HypreSolver.H index 205518f1f16..039373d5ab1 100644 --- a/Src/Extern/HYPRE/AMReX_HypreSolver.H +++ b/Src/Extern/HYPRE/AMReX_HypreSolver.H @@ -220,7 +220,7 @@ HypreSolver::HypreSolver (Vector const& a_index_type, fill_local_id(a_marker); // At this point, m_local_id stores the ids local to each box. - // m_nrows_grid stroes the number of unique points in each box. + // m_nrows_grid stores the number of unique points in each box. // m_nrows_proc is the number of rowss for all variables on this MPI // process. If a point is invalid, its id is invalid (i.e., a very // negative number). Note that the data type of local_node_id is int, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H index b261c5bb3f2..6caed940fa9 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H @@ -65,12 +65,18 @@ public: void prepareForSolve () final; + [[nodiscard]] bool needsUpdate () const final { return m_needs_update; } + + void update () final; + void averageDownCoeffs (); void averageDownCoeffsToCoarseAmrLevel (int flev); void averageDownCoeffsSameAmrLevel (int amrlev); private: + bool m_needs_update = true; + Real m_a_scalar = std::numeric_limits::quiet_NaN(); Real m_b_scalar = std::numeric_limits::quiet_NaN(); Vector > m_a_coeffs; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp index c58fb0e6da9..07c516992f6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp @@ -55,6 +55,7 @@ void MLNodeABecLaplacian::setACoeffs (int amrlev, Real a_acoef) { m_a_coeffs[amrlev][0].setVal(a_acoef); + m_needs_update = true; } void @@ -62,12 +63,14 @@ MLNodeABecLaplacian::setACoeffs (int amrlev, const MultiFab& a_acoef) { const int ncomp = getNComp(); m_a_coeffs[amrlev][0].LocalCopy(a_acoef, 0, 0, ncomp, IntVect(0)); + m_needs_update = true; } void MLNodeABecLaplacian::setBCoeffs (int amrlev, Real a_bcoef) { m_b_coeffs[amrlev][0].setVal(a_bcoef); + m_needs_update = true; } void @@ -75,6 +78,7 @@ MLNodeABecLaplacian::setBCoeffs (int amrlev, const MultiFab& a_bcoef) { const int ncomp = getNComp(); m_b_coeffs[amrlev][0].LocalCopy(a_bcoef, 0, 0, ncomp, IntVect(0)); + m_needs_update = true; } void @@ -251,6 +255,16 @@ MLNodeABecLaplacian::prepareForSolve () buildMasks(); averageDownCoeffs(); + + m_needs_update = false; +} + +void +MLNodeABecLaplacian::update () +{ + BL_PROFILE("MLNodeABecLaplacian::prepareForSolve()"); + averageDownCoeffs(); + m_needs_update = false; } void diff --git a/Tests/HDF5Benchmark/sz.config b/Tests/HDF5Benchmark/sz.config index dd363085210..f2abf5529e1 100644 --- a/Tests/HDF5Benchmark/sz.config +++ b/Tests/HDF5Benchmark/sz.config @@ -76,7 +76,7 @@ relBoundRatio = 1E-9 pw_relBoundRatio = 1E-2 #point-wise relative-error-bound segment size -#The larger this value, the lower overhead of maitaining error bound information for each segment/block, while the stricter absolute bound used to control the point-wise relative-error-bound. The block size will be M^K, where K is the # dimensions of the data and M is an integer such that (M-1)^K < segment_size <= M^K. For instance, for a 2D data, segment_size=32 means the whole data set will be split into multiple small blocks each being 6x6. For a 3D data, segment_size=32 means the whole data set is split into a lot of 4x4x4 blocks, each of which has an individual compression precision/error. +#The larger this value, the lower overhead of maintaining error bound information for each segment/block, while the stricter absolute bound used to control the point-wise relative-error-bound. The block size will be M^K, where K is the # dimensions of the data and M is an integer such that (M-1)^K < segment_size <= M^K. For instance, for a 2D data, segment_size=32 means the whole data set will be split into multiple small blocks each being 6x6. For a 3D data, segment_size=32 means the whole data set is split into a lot of 4x4x4 blocks, each of which has an individual compression precision/error. segment_size = 32 #point-wise relative-error-bound type diff --git a/Tools/CMake/AMReXBuildInfo.cmake b/Tools/CMake/AMReXBuildInfo.cmake index 36c70057871..16795110658 100644 --- a/Tools/CMake/AMReXBuildInfo.cmake +++ b/Tools/CMake/AMReXBuildInfo.cmake @@ -189,7 +189,7 @@ function (generate_buildinfo _target _git_dir) configure_file( ${AMREX_BUILDINFO_IFILE} ${PROJECT_BINARY_DIR}/${_target}/AMReX_buildInfo.cpp @ONLY) - # add a re-usable target + # add a reusable target add_library(buildInfo${_target} STATIC) add_library(buildInfo::${_target} ALIAS buildInfo${_target}) diff --git a/Tools/CMake/AMReXClangTidy.cmake b/Tools/CMake/AMReXClangTidy.cmake index 4f002bc3820..3021d5939d7 100644 --- a/Tools/CMake/AMReXClangTidy.cmake +++ b/Tools/CMake/AMReXClangTidy.cmake @@ -9,7 +9,7 @@ macro(setup_clang_tidy) if (_tmp MATCHES "LLVM version ([0-9\.]+)") message(STATUS "Found clang-tidy ${CMAKE_MATCH_1}") if ("${CMAKE_MATCH_1}" VERSION_GREATER_EQUAL 12.0.0) - # Cofig file not supported in earlier versions + # Config file not supported in earlier versions set(AMReX_CLANG_TIDY_CONFIG_FILE_NAME ${PROJECT_SOURCE_DIR}/.clang-tidy) endif() endif() diff --git a/Tools/C_scripts/mkdep b/Tools/C_scripts/mkdep index 32552d44837..b5c039b212d 100755 --- a/Tools/C_scripts/mkdep +++ b/Tools/C_scripts/mkdep @@ -7,7 +7,7 @@ # Notes: * -I defines a search path for include files # * -DBG turn on debug flag # + -fortran: parse fortran style include directives -# * -X means disgard entries with this path (NOT IMPLEMENTED) +# * -X means discard entries with this path (NOT IMPLEMENTED) # * searches current directory only if -I. is in search # path or #include directive uses double quotes rather # than angle brackets. From d197c8f5db9eb8123804ec849bd5b67728a4172f Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 5 Oct 2023 09:26:05 -0700 Subject: [PATCH 009/186] Docs: Link pyAMReX (#3582) Link Python interfaces after Fortran interfaces. --- Docs/sphinx_documentation/source/Python_Chapter.rst | 13 +++++++++++++ Docs/sphinx_documentation/source/index.rst | 1 + 2 files changed, 14 insertions(+) create mode 100644 Docs/sphinx_documentation/source/Python_Chapter.rst diff --git a/Docs/sphinx_documentation/source/Python_Chapter.rst b/Docs/sphinx_documentation/source/Python_Chapter.rst new file mode 100644 index 00000000000..76061ea2ebe --- /dev/null +++ b/Docs/sphinx_documentation/source/Python_Chapter.rst @@ -0,0 +1,13 @@ +.. role:: cpp(code) + :language: c++ + +.. _Chap:Python: + +Python Interface +================ + + +The core of AMReX is written in C++. +For users who want to write all of their programs in Python, or C++ application developers that like to add Python interfaces to their applications for scripting, rapid prototyping, code coupling and/or AI/ML workflows, many AMReX classes, functions and all data containers are now also available. + +Please see `pyAMReX `__ (`manual `__) for further details. diff --git a/Docs/sphinx_documentation/source/index.rst b/Docs/sphinx_documentation/source/index.rst index 8cdced26a19..b748a6394a5 100644 --- a/Docs/sphinx_documentation/source/index.rst +++ b/Docs/sphinx_documentation/source/index.rst @@ -51,6 +51,7 @@ Documentation on migration from BoxLib is available in the AMReX repository at D LinearSolvers_Chapter Particle_Chapter Fortran_Chapter + Python_Chapter EB_Chapter TimeIntegration_Chapter GPU_Chapter From a9dad648b5336d33ede4ee4e1b9b4456cbce44af Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 6 Oct 2023 09:45:29 -0700 Subject: [PATCH 010/186] Fix ICC CI by Freeing up Disk Space (#3583) The ICC CI has been failing in the last few days with following message, `System.IO.IOException: No space left on device`. So we need to free up more disk space. --- .github/workflows/intel.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 3d701c9adcd..6133e666fad 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -200,6 +200,7 @@ jobs: - uses: actions/checkout@v3 - name: Dependencies run: | + .github/workflows/dependencies/ubuntu_free_disk_space.sh .github/workflows/dependencies/dependencies_dpcpp.sh sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic .github/workflows/dependencies/dependencies_ccache.sh From fd2dfa282a30c497b692398cc0db9b18e6c4de85 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 6 Oct 2023 09:46:30 -0700 Subject: [PATCH 011/186] Disable CCache in Windows CIs (#3566) CCache has stopped working on Windows since 2023-09-20 for unknown reasons. After a couple of hours of investigation, I still could not get it to work. So let's disable it for now. --- .github/workflows/cleanup-cache.yml | 2 +- .github/workflows/windows.yml | 119 ++++++++++++++-------------- 2 files changed, 61 insertions(+), 60 deletions(-) diff --git a/.github/workflows/cleanup-cache.yml b/.github/workflows/cleanup-cache.yml index 2a1a2f254a3..97db5730d25 100644 --- a/.github/workflows/cleanup-cache.yml +++ b/.github/workflows/cleanup-cache.yml @@ -2,7 +2,7 @@ name: CleanUpCache on: workflow_run: - workflows: [LinuxClang, cuda, LinuxGcc, hip, Hypre, intel, macos, PETSc, SUNDIALS, windows, CodeQL, smoke, apps] + workflows: [LinuxClang, cuda, LinuxGcc, hip, Hypre, intel, macos, PETSc, SUNDIALS, CodeQL, smoke, apps] types: - completed diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 985e143108f..18dbce32b5b 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -14,28 +14,28 @@ jobs: steps: - uses: actions/checkout@v3 - uses: seanmiddleditch/gha-setup-ninja@master - - name: Set Up Cache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} - restore-keys: | - ccache-${{ github.workflow }}-${{ github.job }}-git- - - name: Install Ccache - run: | - Invoke-WebRequest https://github.com/ccache/ccache/releases/download/v4.8/ccache-4.8-windows-x86_64.zip -OutFile ccache-4.8-windows-x86_64.zip - Expand-Archive ccache-4.8-windows-x86_64.zip + #- name: Set Up Cache + # uses: actions/cache@v3 + # with: + # path: ~/.ccache + # key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} + # restore-keys: | + # ccache-${{ github.workflow }}-${{ github.job }}-git- + #- name: Install Ccache + # run: | + # Invoke-WebRequest https://github.com/ccache/ccache/releases/download/v4.8/ccache-4.8-windows-x86_64.zip -OutFile ccache-4.8-windows-x86_64.zip + # Expand-Archive ccache-4.8-windows-x86_64.zip - name: Build & Install run: | - $ccachepath = Join-Path $pwd "ccache-4.8-windows-x86_64" - $Env:PATH += ";$ccachepath" - $ccachecachedir = Join-Path $HOME ".ccache" - $Env:CCACHE_DIR="$ccachecachedir" - $Env:CCACHE_DIR - $Env:CCACHE_COMPRESS='1' - $Env:CCACHE_COMPRESSLEVEL='10' - $Env:CCACHE_MAXSIZE='105M' - ccache -z + #$ccachepath = Join-Path $pwd "ccache-4.8-windows-x86_64" + #$Env:PATH += ";$ccachepath" + #$ccachecachedir = Join-Path $HOME ".ccache" + #$Env:CCACHE_DIR="$ccachecachedir" + #$Env:CCACHE_DIR + #$Env:CCACHE_COMPRESS='1' + #$Env:CCACHE_COMPRESSLEVEL='10' + #$Env:CCACHE_MAXSIZE='105M' + #ccache -z cmake -S . -B build ` -G "Ninja" ` @@ -45,11 +45,11 @@ jobs: -DAMReX_EB=OFF ` -DAMReX_ENABLE_TESTS=ON ` -DAMReX_FORTRAN=OFF ` - -DAMReX_MPI=OFF ` - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DAMReX_MPI=OFF + #-DCMAKE_CXX_COMPILER_LAUNCHER=ccache cmake --build build --config Debug -j 2 - ccache -s + #ccache -s # Build libamrex and all test (static) test_msvc_static: @@ -58,27 +58,27 @@ jobs: steps: - uses: actions/checkout@v3 - uses: seanmiddleditch/gha-setup-ninja@master - - name: Set Up Cache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} - restore-keys: | - ccache-${{ github.workflow }}-${{ github.job }}-git- - - name: Install Ccache - run: | - Invoke-WebRequest https://github.com/ccache/ccache/releases/download/v4.8/ccache-4.8-windows-x86_64.zip -OutFile ccache-4.8-windows-x86_64.zip - Expand-Archive ccache-4.8-windows-x86_64.zip + #- name: Set Up Cache + # uses: actions/cache@v3 + # with: + # path: ~/.ccache + # key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} + # restore-keys: | + # ccache-${{ github.workflow }}-${{ github.job }}-git- + #- name: Install Ccache + # run: | + # Invoke-WebRequest https://github.com/ccache/ccache/releases/download/v4.8/ccache-4.8-windows-x86_64.zip -OutFile ccache-4.8-windows-x86_64.zip + # Expand-Archive ccache-4.8-windows-x86_64.zip - name: Build & Install run: | - $ccachepath = Join-Path $pwd "ccache-4.8-windows-x86_64" - $Env:PATH += ";$ccachepath" - $ccachecachedir = Join-Path $HOME ".ccache" - $Env:CCACHE_DIR="$ccachecachedir" - $Env:CCACHE_COMPRESS='1' - $Env:CCACHE_COMPRESSLEVEL='10' - $Env:CCACHE_MAXSIZE='135M' - ccache -z + #$ccachepath = Join-Path $pwd "ccache-4.8-windows-x86_64" + #$Env:PATH += ";$ccachepath" + #$ccachecachedir = Join-Path $HOME ".ccache" + #$Env:CCACHE_DIR="$ccachecachedir" + #$Env:CCACHE_COMPRESS='1' + #$Env:CCACHE_COMPRESSLEVEL='10' + #$Env:CCACHE_MAXSIZE='135M' + #ccache -z cmake -S . -B build ` -G "Ninja" ` @@ -87,11 +87,11 @@ jobs: -DAMReX_EB=ON ` -DAMReX_ENABLE_TESTS=ON ` -DAMReX_FORTRAN=OFF ` - -DAMReX_MPI=OFF ` - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DAMReX_MPI=OFF + #-DCMAKE_CXX_COMPILER_LAUNCHER=ccache cmake --build build --config RelWithDebInfo -j 2 - ccache -s + #ccache -s # Build libamrex and all tests tests_clang: @@ -116,17 +116,18 @@ jobs: -DAMReX_OMP=ON cmake --build build --config Release -j 2 - save_pr_number: - if: github.event_name == 'pull_request' - runs-on: ubuntu-latest - steps: - - name: Save PR number - env: - PR_NUMBER: ${{ github.event.number }} - run: | - echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 - with: - name: pr_number - path: pr_number.txt - retention-days: 1 + # If we add ccache back, don't forget to update cleanup-cache.yml + #save_pr_number: + # if: github.event_name == 'pull_request' + # runs-on: ubuntu-latest + # steps: + # - name: Save PR number + # env: + # PR_NUMBER: ${{ github.event.number }} + # run: | + # echo $PR_NUMBER > pr_number.txt + # - uses: actions/upload-artifact@v3 + # with: + # name: pr_number + # path: pr_number.txt + # retention-days: 1 From f817d776f544410c315cf6007564f88f6b16fc2b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Oct 2023 11:32:13 -0700 Subject: [PATCH 012/186] BCType::ext_dir_cc (#3581) Add a new BC type `BCType::ext_dir_cc` to address the issue in #3580. For cell conservation interpolation, the boundary location for `BCType::ext_dir` is on the domain face. Applications assuming boundary location at cell centers can now use `BCType::ext_dir_cc`. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Docs/sphinx_documentation/source/Basics.rst | 17 ++++++++++++++++- Src/Amr/AMReX_StateDescriptor.cpp | 2 +- Src/Base/AMReX_BCUtil.H | 11 ++++++----- Src/Base/AMReX_BCUtil.cpp | 4 ++-- Src/Base/AMReX_BC_TYPES.H | 8 ++++---- Src/Base/AMReX_FILCC_1D.F90 | 2 +- Src/Base/AMReX_FILCC_2D.F90 | 2 +- Src/Base/AMReX_FILCC_3D.F90 | 2 +- Src/Base/AMReX_FilCC_C.cpp | 12 ++++++------ Src/Base/AMReX_bc_types.fi | 2 ++ Src/Base/AMReX_bc_types_mod.F90 | 1 + Src/Base/AMReX_filcc_mod.F90 | 12 ++++++------ .../Advection_F/Source/fillpatch_mod.F90 | 2 +- Tests/GPU/CNS/Source/CNS_bcfill.cpp | 2 +- 14 files changed, 49 insertions(+), 30 deletions(-) diff --git a/Docs/sphinx_documentation/source/Basics.rst b/Docs/sphinx_documentation/source/Basics.rst index fd2d12ee0b8..1227b5f7b71 100644 --- a/Docs/sphinx_documentation/source/Basics.rst +++ b/Docs/sphinx_documentation/source/Basics.rst @@ -2537,12 +2537,27 @@ The basic idea behind physical boundary conditions is as follows: ext_dir "External Dirichlet". It is the user's responsibility to write a routine - to fill ghost cells (more details below). + to fill ghost cells (more details below). The boundary location + is on the domain face even when the data inside the domain are + cell-centered. + + ext_dir_cc + "External Dirichlet". It is the user's responsibility to write a routine + to fill ghost cells (more details below). The boundary location + is at the cell center of ghost cells outside the domain. foextrap "First Order Extrapolation" First order extrapolation from last cell in interior. + hoextrap + "High Order Extrapolation". The boundary location is on the domain + face even when the data inside the domain are cell-centered. + + hoextrapcc + "High Order Extrapolation" The boundary location is at the cell + center of ghost cells outside the domain. + reflect_even Reflection from interior cells with sign unchanged, :math:`q(-i) = q(i)`. diff --git a/Src/Amr/AMReX_StateDescriptor.cpp b/Src/Amr/AMReX_StateDescriptor.cpp index 161090d1a7f..4f1e3c3fe1b 100644 --- a/Src/Amr/AMReX_StateDescriptor.cpp +++ b/Src/Amr/AMReX_StateDescriptor.cpp @@ -19,7 +19,7 @@ StateDescriptor::bf_thread_safety (const int* /*lo*/,const int* /*hi*/, if (!bf_ext_dir_threadsafe) { bool has_ext_dir = false; for (int i=0; i<2*AMREX_SPACEDIM*ng && !has_ext_dir; ++i) { - has_ext_dir = bc[i]==BCType::ext_dir; + has_ext_dir = ((bc[i]==BCType::ext_dir) || (bc[i]==BCType::ext_dir_cc)); } if (has_ext_dir) { thread_safe = false; } } diff --git a/Src/Base/AMReX_BCUtil.H b/Src/Base/AMReX_BCUtil.H index dd4e814610c..3ebc53a0576 100644 --- a/Src/Base/AMReX_BCUtil.H +++ b/Src/Base/AMReX_BCUtil.H @@ -9,11 +9,12 @@ namespace amrex { // This is for filling cell-centered data outside physical domain - // (excluding periodic boundaries). It only fills - // BCType::foextrap, BCType::hoextrap, BCType::reflect_even, and - // BCType::reflect::odd. It does not fill BCType::ext_dir (i.e., - // external Dirichlet). If you have BCType::ext_dir, you can - // copy, paste and rename this function, and then modify it. + // (excluding periodic boundaries). It only fills BCType::foextrap, + // BCType::hoextrap, BCType::hoextrapcc, BCType::reflect_even, and + // BCType::reflect::odd. It does not fill BCType::ext_dir and + // BCType::ext_dir_cc (i.e., external Dirichlet). If you have + // BCType::ext_dir or BCType::ext_dir_cc, you can copy, paste and rename + // this function, and then modify it. void FillDomainBoundary (MultiFab& phi, const Geometry& geom, const Vector& bc); } diff --git a/Src/Base/AMReX_BCUtil.cpp b/Src/Base/AMReX_BCUtil.cpp index c0a645ba05c..dc5b4f4f10c 100644 --- a/Src/Base/AMReX_BCUtil.cpp +++ b/Src/Base/AMReX_BCUtil.cpp @@ -15,7 +15,7 @@ void dummy_cpu_fill_extdir (Box const& /*bx*/, Array4 const& /*dest*/, const BCRec* /*bcr*/, const int /*bcomp*/, const int /*orig_comp*/) { - // do something for external Dirichlet (BCType::ext_dir) if there are + // do something for external Dirichlet (BCType::ext_dir or BCType::ext_dir_cc) if there are } struct dummy_gpu_fill_extdir @@ -27,7 +27,7 @@ struct dummy_gpu_fill_extdir const BCRec* /*bcr*/, const int /*bcomp*/, const int /*orig_comp*/) const { - // do something for external Dirichlet (BCType::ext_dir) if there are + // do something for external Dirichlet (BCType::ext_dir or BCType::ext_dir_cc) if there are } }; diff --git a/Src/Base/AMReX_BC_TYPES.H b/Src/Base/AMReX_BC_TYPES.H index 872ac1eff5d..00fadd5e6d0 100644 --- a/Src/Base/AMReX_BC_TYPES.H +++ b/Src/Base/AMReX_BC_TYPES.H @@ -44,10 +44,8 @@ SYMMETRY | Un REFLECT_ODD | REFLECT_EVEN | REFLECT_EVEN | INT_DIR : data taken from other grids or interpolated -EXT_DIR : data specified on EDGE (FACE) of bndry for the linear solvers - for application codes, the location of the EXT_DIR data will - depend on how they do reconstruction, and may be edge or - cell-centered. +EXT_DIR : data specified on EDGE (FACE) +EXT_DIR_CC : data specified at cell center HOEXTRAP : higher order extrapolation to EDGE of bndry HOEXTRAPCC : linear extrapolation to CELL of bndry FOEXTRAP : first order extrapolation from last cell in interior @@ -74,6 +72,7 @@ enum mathematicalBndryTypes : int { ext_dir = 3, hoextrap = 4, hoextrapcc = 5, + ext_dir_cc = 6, user_1 = 1001, user_2 = 1002, user_3 = 1003 @@ -94,6 +93,7 @@ enum mathematicalBndryTypes : int { #define EXT_DIR 3 #define HOEXTRAP 4 #define HOEXTRAPCC 5 +#define EXT_DIR_CC 6 #define Interior 0 #define Inflow 1 diff --git a/Src/Base/AMReX_FILCC_1D.F90 b/Src/Base/AMReX_FILCC_1D.F90 index 873f67f5941..76e90a8d7a1 100644 --- a/Src/Base/AMReX_FILCC_1D.F90 +++ b/Src/Base/AMReX_FILCC_1D.F90 @@ -14,7 +14,7 @@ !! corner of q array !! \param bc => array of boundary flags bc(SPACEDIM,lo:hi) !! -!! NOTE: all corner as well as edge data is filled if not EXT_DIR +!! NOTE: all corner as well as edge data is filled if not EXT_DIR/EXT_DIR_CC ! ----------------------------------------------------------- #ifndef AMREX_XSDK diff --git a/Src/Base/AMReX_FILCC_2D.F90 b/Src/Base/AMReX_FILCC_2D.F90 index 89ef77d8384..c2ca8f7ddaf 100644 --- a/Src/Base/AMReX_FILCC_2D.F90 +++ b/Src/Base/AMReX_FILCC_2D.F90 @@ -16,7 +16,7 @@ !! corner of q array !! \param bc => array of boundary flags bc(SPACEDIM,lo:hi) !! -!! NOTE: all corner as well as edge data is filled if not EXT_DIR +!! NOTE: all corner as well as edge data is filled if not EXT_DIR/EXT_DIR_CC ! ----------------------------------------------------------- subroutine filcc(q,q_l1,q_l2,q_h1,q_h2,domlo,domhi,dx,xlo,bc) diff --git a/Src/Base/AMReX_FILCC_3D.F90 b/Src/Base/AMReX_FILCC_3D.F90 index aa3fec74ab0..59ce83d469d 100644 --- a/Src/Base/AMReX_FILCC_3D.F90 +++ b/Src/Base/AMReX_FILCC_3D.F90 @@ -6,7 +6,7 @@ !> This routine is intended to be a generic fill function !! for cell centered data. It knows how to exrapolate, !! and reflect data and can be used to supplement problem -!! specific fill functions (ie. EXT_DIR). +!! specific fill functions (ie. EXT_DIR/EXT_DIR_CC). !! !! \param q <= array to fill !! \param q_l1,q_l2,q_l3,q_h1,q_h2,q_h3 => index extent of q array diff --git a/Src/Base/AMReX_FilCC_C.cpp b/Src/Base/AMReX_FilCC_C.cpp index 7cdba486957..e2d8c6129fd 100644 --- a/Src/Base/AMReX_FilCC_C.cpp +++ b/Src/Base/AMReX_FilCC_C.cpp @@ -41,7 +41,7 @@ void fab_filcc (Box const& bx, Array4 const& qn, int ncomp, if (lo.x < ilo) { const int imin = lo.x; const int imax = ilo-1; - if (bc.lo(0) == BCType::ext_dir) { + if (bc.lo(0) == BCType::ext_dir || bc.lo(0) == BCType::ext_dir_cc) { // Do nothing. } else if (bc.lo(0) == BCType::foextrap) { for (int k = lo.z; k <= hi.z; ++k) { @@ -88,7 +88,7 @@ void fab_filcc (Box const& bx, Array4 const& qn, int ncomp, const int imin = ihi+1; const int imax = hi.x; - if (bc.hi(0) == BCType::ext_dir) { + if (bc.hi(0) == BCType::ext_dir || bc.hi(0) == BCType::ext_dir_cc) { // Do nothing. } else if (bc.hi(0) == BCType::foextrap) { for (int k = lo.z; k <= hi.z; ++k) { @@ -136,7 +136,7 @@ void fab_filcc (Box const& bx, Array4 const& qn, int ncomp, if (lo.y < jlo) { const int jmin = lo.y; const int jmax = jlo-1; - if (bc.lo(1) == BCType::ext_dir) { + if (bc.lo(1) == BCType::ext_dir || bc.lo(1) == BCType::ext_dir_cc) { // Do nothing. } else if (bc.lo(1) == BCType::foextrap) { for (int k = lo.z; k <= hi.z; ++k) { @@ -182,7 +182,7 @@ void fab_filcc (Box const& bx, Array4 const& qn, int ncomp, if (hi.y > jhi) { const int jmin = jhi+1; const int jmax = hi.y; - if (bc.hi(1) == BCType::ext_dir) { + if (bc.hi(1) == BCType::ext_dir || bc.hi(1) == BCType::ext_dir_cc) { // Do nothing. } else if (bc.hi(1) == BCType::foextrap) { for (int k = lo.z; k <= hi.z; ++k) { @@ -231,7 +231,7 @@ void fab_filcc (Box const& bx, Array4 const& qn, int ncomp, if (lo.z < klo) { const int kmin = lo.z; const int kmax = klo-1; - if (bc.lo(2) == BCType::ext_dir) { + if (bc.lo(2) == BCType::ext_dir || bc.lo(2) == BCType::ext_dir_cc) { // Do nothing. } else if (bc.lo(2) == BCType::foextrap) { for (int k = kmin; k <= kmax; ++k) { @@ -277,7 +277,7 @@ void fab_filcc (Box const& bx, Array4 const& qn, int ncomp, if (hi.z > khi) { const int kmin = khi+1; const int kmax = hi.z; - if (bc.hi(2) == BCType::ext_dir) { + if (bc.hi(2) == BCType::ext_dir || bc.hi(2) == BCType::ext_dir_cc) { // Do nothing. } else if (bc.hi(2) == BCType::foextrap) { for (int k = kmin; k <= kmax; ++k) { diff --git a/Src/Base/AMReX_bc_types.fi b/Src/Base/AMReX_bc_types.fi index 2f3340ccfc4..d929b589b3d 100644 --- a/Src/Base/AMReX_bc_types.fi +++ b/Src/Base/AMReX_bc_types.fi @@ -17,3 +17,5 @@ PARAMETER (HOEXTRAP=4) INTEGER HOEXTRAPCC PARAMETER (HOEXTRAPCC=5) + INTEGER EXT_DIR_CC + PARAMETER (EXT_DIR_CC=6) diff --git a/Src/Base/AMReX_bc_types_mod.F90 b/Src/Base/AMReX_bc_types_mod.F90 index c1c6f237ba8..5b8c58c255a 100644 --- a/Src/Base/AMReX_bc_types_mod.F90 +++ b/Src/Base/AMReX_bc_types_mod.F90 @@ -15,6 +15,7 @@ module amrex_bc_types_module integer, parameter, public :: amrex_bc_ext_dir = 3 integer, parameter, public :: amrex_bc_hoextrap = 4 integer, parameter, public :: amrex_bc_hoextrapcc = 5 + integer, parameter, public :: amrex_bc_ext_dir_cc = 6 integer, parameter, public :: amrex_bc_user_1 = 1001 integer, parameter, public :: amrex_bc_user_2 = 1002 integer, parameter, public :: amrex_bc_user_3 = 1003 diff --git a/Src/Base/AMReX_filcc_mod.F90 b/Src/Base/AMReX_filcc_mod.F90 index c1a2b2951f4..1cca9b84090 100644 --- a/Src/Base/AMReX_filcc_mod.F90 +++ b/Src/Base/AMReX_filcc_mod.F90 @@ -165,7 +165,7 @@ subroutine amrex_filccn(lo, hi, q, q_lo, q_hi, ncomp, domlo, domhi, dx, xlo, bc) imin = lo(1) imax = ilo-1 - if (bc(1,1,n) .eq. amrex_bc_ext_dir) then + if (bc(1,1,n) .eq. amrex_bc_ext_dir .or. bc(1,1,n) .eq. amrex_bc_ext_dir_cc) then ! Do nothing. @@ -237,7 +237,7 @@ subroutine amrex_filccn(lo, hi, q, q_lo, q_hi, ncomp, domlo, domhi, dx, xlo, bc) imin = ihi+1 imax = hi(1) - if (bc(1,2,n) .eq. amrex_bc_ext_dir) then + if (bc(1,2,n) .eq. amrex_bc_ext_dir .or. bc(1,2,n) .eq. amrex_bc_ext_dir_cc) then ! Do nothing. @@ -311,7 +311,7 @@ subroutine amrex_filccn(lo, hi, q, q_lo, q_hi, ncomp, domlo, domhi, dx, xlo, bc) jmin = lo(2) jmax = jlo-1 - if (bc(2,1,n) .eq. amrex_bc_ext_dir) then + if (bc(2,1,n) .eq. amrex_bc_ext_dir .or. bc(2,1,n) .eq. amrex_bc_ext_dir_cc) then ! Do nothing. @@ -383,7 +383,7 @@ subroutine amrex_filccn(lo, hi, q, q_lo, q_hi, ncomp, domlo, domhi, dx, xlo, bc) jmin = jhi+1 jmax = hi(2) - if (bc(2,2,n) .eq. amrex_bc_ext_dir) then + if (bc(2,2,n) .eq. amrex_bc_ext_dir .or. bc(2,2,n) .eq. amrex_bc_ext_dir_cc) then ! Do nothing. @@ -461,7 +461,7 @@ subroutine amrex_filccn(lo, hi, q, q_lo, q_hi, ncomp, domlo, domhi, dx, xlo, bc) kmin = lo(3) kmax = klo-1 - if (bc(3,1,n) .eq. amrex_bc_ext_dir) then + if (bc(3,1,n) .eq. amrex_bc_ext_dir .or. bc(3,1,n) .eq. amrex_bc_ext_dir_cc) then ! Do nothing. @@ -533,7 +533,7 @@ subroutine amrex_filccn(lo, hi, q, q_lo, q_hi, ncomp, domlo, domhi, dx, xlo, bc) kmin = khi+1 kmax = hi(3) - if (bc(3,2,n) .eq. amrex_bc_ext_dir) then + if (bc(3,2,n) .eq. amrex_bc_ext_dir .or. bc(3,2,n) .eq. amrex_bc_ext_dir_cc) then ! Do nothing. diff --git a/Tests/FortranInterface/Advection_F/Source/fillpatch_mod.F90 b/Tests/FortranInterface/Advection_F/Source/fillpatch_mod.F90 index f337de6e7ff..c79f2aa7b9a 100644 --- a/Tests/FortranInterface/Advection_F/Source/fillpatch_mod.F90 +++ b/Tests/FortranInterface/Advection_F/Source/fillpatch_mod.F90 @@ -90,7 +90,7 @@ subroutine fill_physbc (pmf, scomp, ncomp, time, pgeom) bind(c) geom%get_physical_location(plo), & ! physical location of lower left corner lo_bc, hi_bc) ! bc types for each component - ! amrex_filcc doesn't fill EXT_DIR (see amrex_bc_types_module for a list of bc types + ! amrex_filcc doesn't fill EXT_DIR/EXT_DIR_CC (see amrex_bc_types_module for a list of bc types ! In that case, the user needs to fill it. end if end do diff --git a/Tests/GPU/CNS/Source/CNS_bcfill.cpp b/Tests/GPU/CNS/Source/CNS_bcfill.cpp index 1c787e29a96..a75c7ebce88 100644 --- a/Tests/GPU/CNS/Source/CNS_bcfill.cpp +++ b/Tests/GPU/CNS/Source/CNS_bcfill.cpp @@ -14,7 +14,7 @@ struct CnsFillExtDir const BCRec* /*bcr*/, const int /*bcomp*/, const int /*orig_comp*/) const { - // do something for external Dirichlet (BCType::ext_dir) + // do something for external Dirichlet (BCType::ext_dir/BCType::ext_dir_cc) } }; From d39abbc396d0a9cc123245947b560954a3e9f16d Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Oct 2023 11:32:25 -0700 Subject: [PATCH 013/186] Add Bittree CI (#3577) Also fix some clang-tidy warnings. Note that the 3D test does not actually work as expected. It has zero cells. We will try to fix in the future, because it's out of the scope of this PR adding CI. --- .github/workflows/bittree.yml | 127 +++++++++++++++++++++++++++ .github/workflows/cleanup-cache.yml | 2 +- Src/Extern/Bittree/AMReX_Bittree.H | 67 +++++++------- Src/Extern/Bittree/AMReX_Bittree.cpp | 119 +++++++++++++------------ 4 files changed, 226 insertions(+), 89 deletions(-) create mode 100644 .github/workflows/bittree.yml diff --git a/.github/workflows/bittree.yml b/.github/workflows/bittree.yml new file mode 100644 index 00000000000..c12fbedc58f --- /dev/null +++ b/.github/workflows/bittree.yml @@ -0,0 +1,127 @@ +name: bittree + +on: [push, pull_request] + +concurrency: + group: ${{ github.ref }}-${{ github.head_ref }}-bittree + cancel-in-progress: true + +jobs: + bittree-2d: + name: Bittree 2D + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Dependencies + run: | + .github/workflows/dependencies/dependencies.sh + .github/workflows/dependencies/dependencies_clang-tidy.sh 15 + .github/workflows/dependencies/dependencies_ccache.sh + - name: Set Up Cache + uses: actions/cache@v3 + with: + path: ~/.cache/ccache + key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} + restore-keys: | + ccache-${{ github.workflow }}-${{ github.job }}-git- + - name: Check out Bittree + uses: actions/checkout@v4 + with: + repository: Flash-X/Bittree + path: bittree + - name: Build Bittree + run: | + cd ${{ github.workspace }}/bittree + python setup.py library --dim 2 --prefix ${{ github.workspace }}/libbittree + cd build + make -j2 + make install + - name: Build and Run Test + run: | + export CCACHE_COMPRESS=1 + export CCACHE_COMPRESSLEVEL=10 + export CCACHE_MAXSIZE=80M + export CCACHE_EXTRAFILES=${{ github.workspace }}/.clang-tidy + export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt + ccache -z + + export AMREX_BITTREE_HOME=${{ github.workspace }}/libbittree + cd ${{ github.workspace }}/Tests/Amr/Advection_AmrCore/Exec + make -j2 USE_MPI=TRUE USE_BITTREE=TRUE DIM=2 TEST=TRUE \ + CCACHE=ccache + mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs_bittree amr.plot_int=1000 + + ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt + make -j2 -f clang-tidy-ccache-misses.mak \ + CLANG_TIDY=clang-tidy-15 \ + CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" + + ccache -s + du -hs ~/.cache/ccache + + bittree-3d: + name: Bittree 3D + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Dependencies + run: | + .github/workflows/dependencies/dependencies.sh + .github/workflows/dependencies/dependencies_clang-tidy.sh 15 + .github/workflows/dependencies/dependencies_ccache.sh + - name: Set Up Cache + uses: actions/cache@v3 + with: + path: ~/.cache/ccache + key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} + restore-keys: | + ccache-${{ github.workflow }}-${{ github.job }}-git- + - name: Check out Bittree + uses: actions/checkout@v4 + with: + repository: Flash-X/Bittree + path: bittree + - name: Build Bittree + run: | + cd ${{ github.workspace }}/bittree + python setup.py library --dim 3 --prefix ${{ github.workspace }}/libbittree + cd build + make -j2 + make install + - name: Build and Run Test + run: | + export CCACHE_COMPRESS=1 + export CCACHE_COMPRESSLEVEL=10 + export CCACHE_MAXSIZE=80M + export CCACHE_EXTRAFILES=${{ github.workspace }}/.clang-tidy + export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt + ccache -z + + export AMREX_BITTREE_HOME=${{ github.workspace }}/libbittree + cd ${{ github.workspace }}/Tests/Amr/Advection_AmrCore/Exec + make -j2 USE_MPI=TRUE USE_BITTREE=TRUE DIM=3 TEST=TRUE BL_NO_FORT=TRUE\ + CCACHE=ccache + mpiexec -n 2 ./main3d.gnu.TEST.MPI.ex inputs_bittree max_step=10 + + ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt + make -j2 -f clang-tidy-ccache-misses.mak \ + CLANG_TIDY=clang-tidy-15 \ + CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" + + ccache -s + du -hs ~/.cache/ccache + + save_pr_number: + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + steps: + - name: Save PR number + env: + PR_NUMBER: ${{ github.event.number }} + run: | + echo $PR_NUMBER > pr_number.txt + - uses: actions/upload-artifact@v3 + with: + name: pr_number + path: pr_number.txt + retention-days: 1 diff --git a/.github/workflows/cleanup-cache.yml b/.github/workflows/cleanup-cache.yml index 97db5730d25..f224ace0ef3 100644 --- a/.github/workflows/cleanup-cache.yml +++ b/.github/workflows/cleanup-cache.yml @@ -2,7 +2,7 @@ name: CleanUpCache on: workflow_run: - workflows: [LinuxClang, cuda, LinuxGcc, hip, Hypre, intel, macos, PETSc, SUNDIALS, CodeQL, smoke, apps] + workflows: [bittree, LinuxClang, cuda, LinuxGcc, hip, Hypre, intel, macos, PETSc, SUNDIALS, CodeQL, smoke, apps] types: - completed diff --git a/Src/Extern/Bittree/AMReX_Bittree.H b/Src/Extern/Bittree/AMReX_Bittree.H index 744cd3b0b58..54a046be720 100644 --- a/Src/Extern/Bittree/AMReX_Bittree.H +++ b/Src/Extern/Bittree/AMReX_Bittree.H @@ -22,47 +22,46 @@ amr.use_bittree = true class btUnit { // Functions used in AmrMesh public: - static int btRefine(bittree::BittreeAmr* const mesh, - std::vector& btTags, - int max_crse, int lbase, - Vector& grids, Vector& dmap, - MPI_Comm comm); - static void btCalculateGrids(bittree::BittreeAmr* const mesh, - int lbase, - int& new_finest, - Vector& new_grids, - Vector const& max_grid_size); - static void btCalculateLevel(bittree::BittreeAmr* const mesh, - int lev, - BoxArray& ba, - IntVect const& max_grid_size); + static int btRefine (bittree::BittreeAmr* mesh, + std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, + MPI_Comm comm); + static void btCalculateGrids (bittree::BittreeAmr* mesh, + int lbase, + int& new_finest, + Vector& new_grids, + Vector const& max_grid_size); + static void btCalculateLevel (bittree::BittreeAmr* mesh, + int lev, + BoxArray& ba, + IntVect const& max_grid_size); // Utils - public: - static int getBitid(bittree::BittreeAmr* const mesh, bool updated, - int lev, int idx_on_lev); - static int getIndex(bittree::BittreeAmr* const mesh, bool updated, - int lev, int bitid); + static int getBitid (bittree::BittreeAmr* mesh, bool updated, + int lev, int idx_on_lev); + static int getIndex (bittree::BittreeAmr* mesh, bool updated, + int lev, int bitid); // Functions to implement strict octree logic private: - static void btCheckRefine(bittree::BittreeAmr* const mesh, - std::vector& btTags, - int max_crse, int lbase, - Vector& grids, Vector& dmap, - MPI_Comm comm); + static void btCheckRefine (bittree::BittreeAmr* mesh, + std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, + MPI_Comm comm); - static void btCheckDerefine(bittree::BittreeAmr* const mesh, - std::vector& btTags, - int max_crse, int lbase, - Vector& grids, Vector& dmap, - MPI_Comm comm); + static void btCheckDerefine (bittree::BittreeAmr* mesh, + std::vector& btTags, + int max_crse, int lbase, + Vector& grids, Vector& dmap, + MPI_Comm comm); // Utility Functions - static bool checkNeighborsRefine(bittree::BittreeAmr* const mesh, - bittree::MortonTree::Block b); - static std::vector neighIntCoords(bittree::BittreeAmr* const mesh, - unsigned lev, unsigned* lcoord, - int* gCell); + static bool checkNeighborsRefine (bittree::BittreeAmr* mesh, + bittree::MortonTree::Block b); + static std::vector neighIntCoords (bittree::BittreeAmr* mesh, + unsigned lev, unsigned const* lcoord, + int const* gCell); public: // Represents whether domain has periodic BC in each direction diff --git a/Src/Extern/Bittree/AMReX_Bittree.cpp b/Src/Extern/Bittree/AMReX_Bittree.cpp index 543b5688208..51bbe199278 100644 --- a/Src/Extern/Bittree/AMReX_Bittree.cpp +++ b/Src/Extern/Bittree/AMReX_Bittree.cpp @@ -6,9 +6,9 @@ using namespace bittree; namespace amrex { -static constexpr int K1D = unsigned(AMREX_SPACEDIM>=1); -static constexpr int K2D = unsigned(AMREX_SPACEDIM>=2); -static constexpr int K3D = unsigned(AMREX_SPACEDIM>=3); +static constexpr auto K1D = int(AMREX_SPACEDIM>=1); +static constexpr auto K2D = int(AMREX_SPACEDIM>=2); +static constexpr auto K3D = int(AMREX_SPACEDIM>=3); bool btUnit::bcPeriodic[AMREX_SPACEDIM]; @@ -30,10 +30,10 @@ The functions here are called in the BT version of MakeNewGrids which has three * This makes use of BT library functions and as well as routines adapted * from Flash-X that enforce Octree nesting. */ -int btUnit::btRefine( BittreeAmr* const mesh, std::vector& btTags, +int btUnit::btRefine (BittreeAmr* const mesh, std::vector& btTags, int max_crse, int lbase, - Vector& grids, Vector& dmap, MPI_Comm comm) { - + Vector& grids, Vector& dmap, MPI_Comm comm) +{ BL_PROFILE("Bittree-btRefine"); // Tree before refinement. With only one rank, lnblocks = nblocks. @@ -79,15 +79,16 @@ int btUnit::btRefine( BittreeAmr* const mesh, std::vector& btTags, /** Creates new box arrays to match the new Bittree mesh. */ -void btUnit::btCalculateGrids(BittreeAmr* const mesh, int lbase, - int& new_finest, - Vector& new_grids, - Vector const& max_grid_size) { +void btUnit::btCalculateGrids (BittreeAmr* const mesh, int lbase, + int& new_finest, + Vector& new_grids, + Vector const& max_grid_size) +{ BL_PROFILE("Bittree-btCalculateGrids"); auto tree1 = mesh->getTree(true); - int nlevs = tree1->levels(); - new_finest = nlevs - 1; + auto nlevs = tree1->levels(); + new_finest = int(nlevs - 1); //--Calculate the new grid layout and distribution map based on Bittree for(int lev=lbase; lev<=new_finest; ++lev) { @@ -98,21 +99,22 @@ void btUnit::btCalculateGrids(BittreeAmr* const mesh, int lbase, /** Creates a box array based on Bittree. */ -void btUnit::btCalculateLevel(BittreeAmr* const mesh, int lev, - BoxArray& ba, - IntVect const& max_grid_size) { +void btUnit::btCalculateLevel (BittreeAmr* const mesh, int lev, + BoxArray& ba, + IntVect const& max_grid_size) +{ auto tree1 = mesh->getTree(true); //Bittree has its own indices for blocks which I call bitid; get //the range of bitids for the level being made. Bitid range is //contiguous for each level. - int id0 = tree1->level_id0(lev); - int id1 = tree1->level_id1(lev); - int nblocks = tree1->level_blocks(lev); + auto id0 = tree1->level_id0(lev); + auto id1 = tree1->level_id1(lev); + // int nblocks = tree1->level_blocks(lev); BoxList bl; - for(int i=id0; ilocate(i); @@ -132,14 +134,16 @@ void btUnit::btCalculateLevel(BittreeAmr* const mesh, int lev, ba = BoxArray(bl); } -int btUnit::getBitid(BittreeAmr* const mesh, bool updated, - int lev, int idx_on_lev) { - return idx_on_lev + mesh->getTree(updated)->level_id0(lev); +int btUnit::getBitid (BittreeAmr* const mesh, bool updated, + int lev, int idx_on_lev) +{ + return idx_on_lev + int(mesh->getTree(updated)->level_id0(lev)); } -int btUnit::getIndex(BittreeAmr* const mesh, bool updated, - int lev, int bitid) { - return bitid - mesh->getTree(updated)->level_id0(lev); +int btUnit::getIndex (BittreeAmr* const mesh, bool updated, + int lev, int bitid) +{ + return bitid - int(mesh->getTree(updated)->level_id0(lev)); } @@ -152,10 +156,11 @@ int btUnit::getIndex(BittreeAmr* const mesh, bool updated, * to a strict octree structure with no more than one level difference * between surrounding leaf blocks. */ -void btUnit::btCheckRefine(BittreeAmr* const mesh, std::vector& btTags, - int max_crse, int lbase, - Vector& grids, Vector& dmap, MPI_Comm comm) { - +void btUnit::btCheckRefine (BittreeAmr* const mesh, std::vector& btTags, + int max_crse, int lbase, + Vector& grids, + Vector& dmap, MPI_Comm comm) +{ BL_PROFILE("Bittree-btCheckRefine"); // Tree before refinement. @@ -217,10 +222,11 @@ void btUnit::btCheckRefine(BittreeAmr* const mesh, std::vector& btTags, * to a strict octree structure with no more than one level difference * between surrounding leaf blocks. */ -void btUnit::btCheckDerefine(BittreeAmr* const mesh, std::vector& btTags, - int max_crse, int lbase, - Vector& grids, Vector& dmap, MPI_Comm comm) { - +void btUnit::btCheckDerefine (BittreeAmr* const mesh, std::vector& btTags, + int max_crse, int lbase, + Vector& grids, + Vector& dmap, MPI_Comm comm) +{ BL_PROFILE("Bittree-btCheckDerefine"); // Tree before refinement. With only one rank, lnblocks = nblocks. @@ -280,8 +286,8 @@ void btUnit::btCheckDerefine(BittreeAmr* const mesh, std::vector& btTags, // Check all neighbors to see if their adjacent children are parents or marked for refinement. -bool btUnit::checkNeighborsRefine(BittreeAmr* const mesh, MortonTree::Block b) { - +bool btUnit::checkNeighborsRefine (BittreeAmr* const mesh, MortonTree::Block b) +{ BL_PROFILE("Bittree-checkNeighborsRefine"); auto tree0 = mesh->getTree(); @@ -339,36 +345,41 @@ bool btUnit::checkNeighborsRefine(BittreeAmr* const mesh, MortonTree::Block b) { /** Calculate integer coordinates of neighbors, taking into account BCs. * Currently assuming Periodic in all directions. */ -std::vector btUnit::neighIntCoords(BittreeAmr* const mesh, - unsigned lev, unsigned* lcoord, int* gCell) { +std::vector btUnit::neighIntCoords (BittreeAmr* const mesh, + unsigned lev, unsigned const* lcoord, + int const* gCell) +{ auto tree = mesh->getTree(); std::vector neighCoord(AMREX_SPACEDIM); //--Calculate integer coordinates of neighbor in direction - for(unsigned d=0;d(lcoord[d]) + gCell[d]; + for(unsigned d=0;d(lcoord[d]) + gCell[d]; + } //--Make sure not out-of-bounds. If periodic BCs, apply modulo std::vector maxcoord(AMREX_SPACEDIM); - for(unsigned d=0;d(tree->top_size(d)) << lev; - for(unsigned d=0;d(tree->top_size(d)) << lev; + } - if (neighCoord[d] >= maxcoord[d]) { - if ( bcPeriodic[d] == true ) - neighCoord[d] = neighCoord[d] - maxcoord[d]; - else - neighCoord[d] = -1; - } + for(unsigned d=0;d= maxcoord[d]) { + if ( bcPeriodic[d] == true ) { + neighCoord[d] = neighCoord[d] - maxcoord[d]; + } else { + neighCoord[d] = -1; + } + } } return neighCoord; From 01b30b79186b329b1b344463af9505058ae8b3e4 Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Mon, 9 Oct 2023 10:20:06 -0700 Subject: [PATCH 014/186] =?UTF-8?q?change=20MaxCnt=20from=204=20to=20max(4?= =?UTF-8?q?,max=5Flevel+1)=20for=20how=20many=20iterations=20we=E2=80=A6?= =?UTF-8?q?=20(#3588)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … allow in creation of the initial grid hierarchy --- Src/Amr/AMReX_Amr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Amr/AMReX_Amr.cpp b/Src/Amr/AMReX_Amr.cpp index 0b1dfb6f886..c3e023e04a8 100644 --- a/Src/Amr/AMReX_Amr.cpp +++ b/Src/Amr/AMReX_Amr.cpp @@ -3085,7 +3085,7 @@ Amr::bldFineLevels (Real strt_time) { bool grids_the_same; - const int MaxCnt = 4; + const int MaxCnt = std::max(4, max_level+1); int count = 0; From 77d4d1fe5ce68a1e71095093ce856e061f24fc07 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 9 Oct 2023 10:21:25 -0700 Subject: [PATCH 015/186] RealVect Static: Export (#3589) ## Summary Export global symbols for Windows. Seen in WarpX on conda-forge: ``` lld-link: warning: ignoring unknown argument '-lpthreads' lld-link: error: undefined symbol: public: static class amrex::RealVect const amrex::RealVect::Zero >>> referenced by libwarpx.rz.NOMPI.OMP.DP.PDP.OPMD.PSATD.QED.lib(SpectralKSpaceRZ.cpp.obj):(public: __cdecl SpectralKSpaceRZ::SpectralKSpaceRZ(class amrex::BoxArray const &, class amrex::DistributionMapping const &, class amrex::RealVect)) ``` ## Additional background https://github.com/conda-forge/warpx-feedstock/pull/72 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_RealVect.H | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Src/Base/AMReX_RealVect.H b/Src/Base/AMReX_RealVect.H index 83cc747b030..635d21927f6 100644 --- a/Src/Base/AMReX_RealVect.H +++ b/Src/Base/AMReX_RealVect.H @@ -521,13 +521,13 @@ public: /** This is a RealVect all of whose components are equal to zero. */ - static const RealVect Zero; + static AMREX_EXPORT const RealVect Zero; /// /** This is a RealVect all of whose components are equal to one. */ - static const RealVect Unit; + static AMREX_EXPORT const RealVect Unit; /*@}*/ From ecaa14456e3f5916e6ef2b08095d61f61f19eeae Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 11 Oct 2023 16:22:23 -0700 Subject: [PATCH 016/186] Fix a memory "leak" in VisMF's persistent streams (#3592) ## Summary VisMF uses a std::map to keep track of the streams it opens. Each PersistentIFStream object also has a vector used as IO buffer. When a stream is closed, we need to free to memory used by the buffer. The issue was it used std::vector::clear(), which only changes size() without reducing capacity() at all. So the memory was never freed. The issue is now fixed by std::vector::swap. ## Additional background Thank Simon Guichandut (@simonguichandut) for reporting this! Thank Eric Johnson (@yut23) for pinning down the location of the leak with the help of the Massif heap profiler! ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_VisMF.cpp | 2 +- Src/Base/AMReX_VisMFBuffer.H | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Src/Base/AMReX_VisMF.cpp b/Src/Base/AMReX_VisMF.cpp index 52efe827460..912be0b9b99 100644 --- a/Src/Base/AMReX_VisMF.cpp +++ b/Src/Base/AMReX_VisMF.cpp @@ -2202,7 +2202,7 @@ void VisMF::CloseStream(const std::string &fileName, bool forceClose) pifs.pstr = nullptr; pifs.isOpen = false; } - pifs.ioBuffer.clear(); + VisMFBuffer::ClearBuffer(pifs.ioBuffer); } diff --git a/Src/Base/AMReX_VisMFBuffer.H b/Src/Base/AMReX_VisMFBuffer.H index 843e5713f34..f095dbbebf2 100644 --- a/Src/Base/AMReX_VisMFBuffer.H +++ b/Src/Base/AMReX_VisMFBuffer.H @@ -29,6 +29,10 @@ public: ioBufferSize = iobuffersize; } + static void ClearBuffer (IO_Buffer& buf) { + IO_Buffer().swap(buf); + } + protected: static AMREX_EXPORT Long ioBufferSize; //!< ---- the settable buffer size From 7ee29121ed70d7e255ad98a8b1690d345cb4fb33 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 16 Oct 2023 12:52:06 -0700 Subject: [PATCH 017/186] Fix Assertion in MLEBNodeFDLaplacian (#3594) ## Summary MLEBNodeFDLaplacian is never singular. MLNodeLinOp::m_is_bottom_singular should be ignored. This bug caused an assertion error (although it should not affect result if there is no assertion). Also to prevent this from happening again, m_is_bottom_singular has been made private. ## Additional background https://github.com/ECP-WarpX/WarpX/pull/4363 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 15 +-------------- .../MLMG/AMReX_MLNodeLaplacian_sten.cpp | 2 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H | 4 +++- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index 523ed4cb44d..ad0fc6f1200 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -284,20 +284,7 @@ MLEBNodeFDLaplacian::prepareForSolve () }); } - if (m_is_bottom_singular) - { - int amrlev = 0; - int mglev = 0; - auto const& dotmasks = m_coarse_dot_mask.arrays(); - auto const& dirmasks = m_dirichlet_mask[amrlev][mglev]->const_arrays(); - amrex::ParallelFor(m_coarse_dot_mask, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - if (dirmasks[box_no](i,j,k)) { - dotmasks[box_no](i,j,k) = Real(0.); - } - }); - } + AMREX_ASSERT(!isBottomSingular()); Gpu::streamSynchronize(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sten.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sten.cpp index 602f893b00d..db83ee8c979 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sten.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sten.cpp @@ -344,7 +344,7 @@ MLNodeLaplacian::buildStencil () }); } - if (m_is_bottom_singular) + if (isBottomSingular()) { int amrlev = 0; int mglev = 0; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H index 424d22f60c3..89ee258a3aa 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H @@ -156,7 +156,6 @@ protected: CoarseningStrategy m_coarsening_strategy = CoarseningStrategy::Sigma; #endif - bool m_is_bottom_singular = false; bool m_masks_built = false; bool m_overset_dirichlet_mask = false; #ifdef AMREX_USE_GPU @@ -165,6 +164,9 @@ protected: int m_smooth_num_sweeps = 2; #endif mutable bool m_in_solution_mode = true; + +private: + bool m_is_bottom_singular = false; }; } From 83bc468e2434855952b0ba010bac9e8949a0c7a0 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 19 Oct 2023 12:16:15 -0700 Subject: [PATCH 018/186] Avoid std::filesystem (#3602) It has caused a lot of issues on systems with a default gcc <= 8. PR #3523 was trying to the issue. But unfortunately it does not always work. So we are going to simply avoid std::filesystem. --- Tools/Plotfile/fgradient.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Tools/Plotfile/fgradient.cpp b/Tools/Plotfile/fgradient.cpp index 60bb1371585..1de840f68e2 100644 --- a/Tools/Plotfile/fgradient.cpp +++ b/Tools/Plotfile/fgradient.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -129,7 +128,7 @@ void main_main() } if (outfile.empty()) { - outfile = "grad."+std::filesystem::path(pltfile).filename().string(); + outfile = "grad."+VisMF::BaseName(pltfile); } PlotFileData pf(pltfile); From 88d08113d7ba5ad6a75e0f47b560b4ccd00084ec Mon Sep 17 00:00:00 2001 From: Jon Rood Date: Thu, 19 Oct 2023 13:17:14 -0600 Subject: [PATCH 019/186] Revert "Add ability for GCC 8 in CMake to build fgradient which uses std::filesystem" (#3601) Reverts AMReX-Codes/amrex#3523 --- Tools/CMake/FindFilesystem.cmake | 244 ------------------------------- Tools/Plotfile/CMakeLists.txt | 8 - 2 files changed, 252 deletions(-) delete mode 100644 Tools/CMake/FindFilesystem.cmake diff --git a/Tools/CMake/FindFilesystem.cmake b/Tools/CMake/FindFilesystem.cmake deleted file mode 100644 index fe9a0aea604..00000000000 --- a/Tools/CMake/FindFilesystem.cmake +++ /dev/null @@ -1,244 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. - -# This is copied from: -# https://github.com/vector-of-bool/CMakeCM/blob/master/modules/FindFilesystem.cmake - -#[=======================================================================[.rst: - -FindFilesystem -############## - -This module supports the C++17 standard library's filesystem utilities. Use the -:imp-target:`std::filesystem` imported target to - -Options -******* - -The ``COMPONENTS`` argument to this module supports the following values: - -.. find-component:: Experimental - :name: fs.Experimental - - Allows the module to find the "experimental" Filesystem TS version of the - Filesystem library. This is the library that should be used with the - ``std::experimental::filesystem`` namespace. - -.. find-component:: Final - :name: fs.Final - - Finds the final C++17 standard version of the filesystem library. - -If no components are provided, behaves as if the -:find-component:`fs.Final` component was specified. - -If both :find-component:`fs.Experimental` and :find-component:`fs.Final` are -provided, first looks for ``Final``, and falls back to ``Experimental`` in case -of failure. If ``Final`` is found, :imp-target:`std::filesystem` and all -:ref:`variables ` will refer to the ``Final`` version. - - -Imported Targets -**************** - -.. imp-target:: std::filesystem - - The ``std::filesystem`` imported target is defined when any requested - version of the C++ filesystem library has been found, whether it is - *Experimental* or *Final*. - - If no version of the filesystem library is available, this target will not - be defined. - - .. note:: - This target has ``cxx_std_17`` as an ``INTERFACE`` - :ref:`compile language standard feature `. Linking - to this target will automatically enable C++17 if no later standard - version is already required on the linking target. - - -.. _fs.variables: - -Variables -********* - -.. variable:: CXX_FILESYSTEM_IS_EXPERIMENTAL - - Set to ``TRUE`` when the :find-component:`fs.Experimental` version of C++ - filesystem library was found, otherwise ``FALSE``. - -.. variable:: CXX_FILESYSTEM_HAVE_FS - - Set to ``TRUE`` when a filesystem header was found. - -.. variable:: CXX_FILESYSTEM_HEADER - - Set to either ``filesystem`` or ``experimental/filesystem`` depending on - whether :find-component:`fs.Final` or :find-component:`fs.Experimental` was - found. - -.. variable:: CXX_FILESYSTEM_NAMESPACE - - Set to either ``std::filesystem`` or ``std::experimental::filesystem`` - depending on whether :find-component:`fs.Final` or - :find-component:`fs.Experimental` was found. - - -Examples -******** - -Using `find_package(Filesystem)` with no component arguments: - -.. code-block:: cmake - - find_package(Filesystem REQUIRED) - - add_executable(my-program main.cpp) - target_link_libraries(my-program PRIVATE std::filesystem) - - -#]=======================================================================] - - -if(TARGET std::filesystem) - # This module has already been processed. Don't do it again. - return() -endif() - -cmake_policy(PUSH) -if(POLICY CMP0067) - # pass CMAKE_CXX_STANDARD to check_cxx_source_compiles() - # has to appear before including CheckCXXSourceCompiles module - cmake_policy(SET CMP0067 NEW) -endif() - -include(CMakePushCheckState) -include(CheckIncludeFileCXX) -include(CheckCXXSourceCompiles) - -cmake_push_check_state() - -set(CMAKE_REQUIRED_QUIET ${Filesystem_FIND_QUIETLY}) - -# All of our tests required C++17 or later -set(CMAKE_CXX_STANDARD 17) - -# Normalize and check the component list we were given -set(want_components ${Filesystem_FIND_COMPONENTS}) -if(Filesystem_FIND_COMPONENTS STREQUAL "") - set(want_components Final) -endif() - -# Warn on any unrecognized components -set(extra_components ${want_components}) -list(REMOVE_ITEM extra_components Final Experimental) -foreach(component IN LISTS extra_components) - message(WARNING "Extraneous find_package component for Filesystem: ${component}") -endforeach() - -# Detect which of Experimental and Final we should look for -set(find_experimental TRUE) -set(find_final TRUE) -if(NOT "Final" IN_LIST want_components) - set(find_final FALSE) -endif() -if(NOT "Experimental" IN_LIST want_components) - set(find_experimental FALSE) -endif() - -if(find_final) - check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER) - mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER) - if(_CXX_FILESYSTEM_HAVE_HEADER) - # We found the non-experimental header. Don't bother looking for the - # experimental one. - set(find_experimental FALSE) - endif() -else() - set(_CXX_FILESYSTEM_HAVE_HEADER FALSE) -endif() - -if(find_experimental) - check_include_file_cxx("experimental/filesystem" _CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) - mark_as_advanced(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) -else() - set(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER FALSE) -endif() - -if(_CXX_FILESYSTEM_HAVE_HEADER) - set(_have_fs TRUE) - set(_fs_header filesystem) - set(_fs_namespace std::filesystem) -elseif(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) - set(_have_fs TRUE) - set(_fs_header experimental/filesystem) - set(_fs_namespace std::experimental::filesystem) -else() - set(_have_fs FALSE) -endif() - -set(CXX_FILESYSTEM_HAVE_FS ${_have_fs} CACHE BOOL "TRUE if we have the C++ filesystem headers") -set(CXX_FILESYSTEM_HEADER ${_fs_header} CACHE STRING "The header that should be included to obtain the filesystem APIs") -set(CXX_FILESYSTEM_NAMESPACE ${_fs_namespace} CACHE STRING "The C++ namespace that contains the filesystem APIs") - -set(_found FALSE) - -if(CXX_FILESYSTEM_HAVE_FS) - # We have some filesystem library available. Do link checks - string(CONFIGURE [[ - #include <@CXX_FILESYSTEM_HEADER@> - - int main() { - auto cwd = @CXX_FILESYSTEM_NAMESPACE@::current_path(); - return static_cast(cwd.string().size()); - } - ]] code @ONLY) - - # Try to compile a simple filesystem program without any linker flags - check_cxx_source_compiles("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED) - - set(can_link ${CXX_FILESYSTEM_NO_LINK_NEEDED}) - - if(NOT CXX_FILESYSTEM_NO_LINK_NEEDED) - set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES}) - # Add the libstdc++ flag - set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs) - check_cxx_source_compiles("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED) - set(can_link ${CXX_FILESYSTEM_STDCPPFS_NEEDED}) - if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED) - # Try the libc++ flag - set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs) - check_cxx_source_compiles("${code}" CXX_FILESYSTEM_CPPFS_NEEDED) - set(can_link ${CXX_FILESYSTEM_CPPFS_NEEDED}) - endif() - endif() - - if(can_link) - add_library(std::filesystem INTERFACE IMPORTED) - target_compile_features(std::filesystem INTERFACE cxx_std_17) - set(_found TRUE) - if(CXX_FILESYSTEM_NO_LINK_NEEDED) - # on certain linux distros we have a version of libstdc++ which has the final code for c++17 fs in the - # libstdc++.so.*. BUT when compiling with g++ < 9, we MUST still link with libstdc++fs.a - # libc++ should not suffer from this issue, so, in theory we should be fine with only checking for - # GCC's libstdc++ - if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0.0")) - target_link_libraries(std::filesystem INTERFACE -lstdc++fs) - endif() - elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED) - target_link_libraries(std::filesystem INTERFACE -lstdc++fs) - elseif(CXX_FILESYSTEM_CPPFS_NEEDED) - target_link_libraries(std::filesystem INTERFACE -lc++fs) - endif() - endif() -endif() - -cmake_pop_check_state() - -set(Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can compile and link a program using std::filesystem" FORCE) - -if(Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND) - message(FATAL_ERROR "Cannot Compile simple program using std::filesystem") -endif() - -cmake_policy(POP) diff --git a/Tools/Plotfile/CMakeLists.txt b/Tools/Plotfile/CMakeLists.txt index 464dad40bb1..0efc55c9a16 100644 --- a/Tools/Plotfile/CMakeLists.txt +++ b/Tools/Plotfile/CMakeLists.txt @@ -37,11 +37,3 @@ if (AMReX_CUDA) set_source_files_properties(AMReX_PPMUtil.cpp PROPERTIES LANGUAGE CUDA) target_compile_features(fsnapshot PUBLIC cxx_std_17) endif() - -# fgradient uses std::filesystem and GCC < 9 needs special treatment for std::filesystem -if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0.0")) - find_package(Filesystem) - if (CXX_FILESYSTEM_HAVE_FS) - target_link_libraries(fgradient PRIVATE std::filesystem) - endif() -endif() From c7b1ac7b836369daabc1dc00e3a3447e8dd56384 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 19 Oct 2023 12:32:40 -0700 Subject: [PATCH 020/186] Install Move Tools to `shared/amrex` (#3599) ## Summary Packaging AMReX for Conda, we realized that we break with common conventions on Unix systems by installing a top-level `Tools/` directory. This moves its content in both the build directory and the install destination to `share/amrex/` and `/AMReXCMakeModules/`, respectively. ## Additional background https://github.com/conda-forge/staged-recipes/pull/24294 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Weiqun Zhang --- Tools/CMake/AMReXBuildInfo.cmake | 29 +++++++++++++++++++------ Tools/CMake/AMReXInstallHelpers.cmake | 31 ++++++++++++++++++--------- Tools/CMake/AMReXTypecheck.cmake | 8 +++++-- 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/Tools/CMake/AMReXBuildInfo.cmake b/Tools/CMake/AMReXBuildInfo.cmake index 16795110658..0d695fad9db 100644 --- a/Tools/CMake/AMReXBuildInfo.cmake +++ b/Tools/CMake/AMReXBuildInfo.cmake @@ -38,15 +38,30 @@ include(AMReXTargetHelpers) # # Set paths # -string(REPLACE "/Tools/CMake" "" AMREX_TOP_DIR ${CMAKE_CURRENT_LIST_DIR}) -set( AMREX_TOP_DIR ${AMREX_TOP_DIR} CACHE INTERNAL "Top level AMReX directory") - -set( AMREX_BUILDINFO_IFILE ${CMAKE_CURRENT_LIST_DIR}/AMReX_buildInfo.cpp.in +if (AMReX_FOUND) + # AMReX is pre-installed and used as a library + string(REPLACE "/lib/cmake/AMReX/AMReXCMakeModules" "" AMREX_TOP_DIR_DEFAULT + ${CMAKE_CURRENT_LIST_DIR}) +else () + # this is a superbuild + string(REPLACE "/Tools/CMake" "" AMREX_TOP_DIR_DEFAULT + ${CMAKE_CURRENT_LIST_DIR}) +endif () +set(AMREX_TOP_DIR "${AMREX_TOP_DIR_DEFAULT}" CACHE INTERNAL "Top level AMReX directory") + +if (AMReX_FOUND) + # AMReX is pre-installed and used as a library + set(AMREX_C_SCRIPTS_DIR "${AMREX_TOP_DIR}/share/amrex/C_scripts" + CACHE INTERNAL "Path to AMReX' C_scripts dir") +else () + # this is a superbuild + set(AMREX_C_SCRIPTS_DIR "${AMREX_TOP_DIR}/Tools/C_scripts" + CACHE INTERNAL "Path to AMReX' C_scripts dir") +endif () + +set(AMREX_BUILDINFO_IFILE ${CMAKE_CURRENT_LIST_DIR}/AMReX_buildInfo.cpp.in CACHE INTERNAL "Full path and name of AMReX_buildInfo.cpp.in") -set( AMREX_C_SCRIPTS_DIR "${AMREX_TOP_DIR}/Tools/C_scripts" - CACHE INTERNAL "Path to AMReX' C_scripts dir") - set(AMREX_BUILD_DATETIME "" CACHE STRING "User defined build date and time. Set ONLY for reproducibly built binary distributions") diff --git a/Tools/CMake/AMReXInstallHelpers.cmake b/Tools/CMake/AMReXInstallHelpers.cmake index 0720efb4cf9..a01cd63301d 100644 --- a/Tools/CMake/AMReXInstallHelpers.cmake +++ b/Tools/CMake/AMReXInstallHelpers.cmake @@ -26,7 +26,7 @@ function (install_amrex_targets) else() set(CMAKE_FILES_DIR lib/cmake/AMReX) endif() - set(MODULE_PATH Tools/CMake) # Relative path to top level installation/build-tree + set(MODULE_PATH ${CMAKE_FILES_DIR}/AMReXCMakeModules) # Relative path to top level installation/build-tree # Write Config file -- this is designed to work for both install and build trees configure_package_config_file(${AMREX_CMAKE_MODULES_PATH}/AMReXConfig.cmake.in @@ -60,7 +60,7 @@ function (install_amrex_targets) install( EXPORT AMReXTargets NAMESPACE AMReX:: - DESTINATION lib/cmake/AMReX ) + DESTINATION ${CMAKE_FILES_DIR} ) # # alias: last element will be legacy target @@ -88,11 +88,17 @@ function (install_amrex_targets) # Install Tools directory install( DIRECTORY - ${PROJECT_SOURCE_DIR}/Tools/CMake - ${PROJECT_SOURCE_DIR}/Tools/C_scripts - ${PROJECT_SOURCE_DIR}/Tools/typechecker + ${PROJECT_SOURCE_DIR}/Tools/C_scripts + ${PROJECT_SOURCE_DIR}/Tools/typechecker + DESTINATION + share/amrex + USE_SOURCE_PERMISSIONS + ) + install( + DIRECTORY + ${PROJECT_SOURCE_DIR}/Tools/CMake/ DESTINATION - Tools + ${MODULE_PATH} USE_SOURCE_PERMISSIONS ) endif() @@ -106,11 +112,16 @@ function (install_amrex_targets) # Copy Tools directory to build tree file( COPY - ${PROJECT_SOURCE_DIR}/Tools/CMake - ${PROJECT_SOURCE_DIR}/Tools/C_scripts - ${PROJECT_SOURCE_DIR}/Tools/typechecker + ${PROJECT_SOURCE_DIR}/Tools/C_scripts + ${PROJECT_SOURCE_DIR}/Tools/typechecker + DESTINATION + ${PROJECT_BINARY_DIR}/share/amrex + ) + file( + COPY + ${PROJECT_SOURCE_DIR}/Tools/CMake/ DESTINATION - ${PROJECT_BINARY_DIR}/Tools + ${PROJECT_BINARY_DIR}/${MODULE_PATH} ) diff --git a/Tools/CMake/AMReXTypecheck.cmake b/Tools/CMake/AMReXTypecheck.cmake index 0b68fb8c274..c569eb5d9bd 100644 --- a/Tools/CMake/AMReXTypecheck.cmake +++ b/Tools/CMake/AMReXTypecheck.cmake @@ -296,9 +296,13 @@ function( add_typecheck_target _target) # set(_outfile "${_typecheck_dir}/${_target}_typecheck.ou" ) - # Find typechecker + # Find typechecker find_file(_typechecker "typechecker.py" - HINTS ${AMReX_SOURCE_DIR} ${AMReX_ROOT} ENV AMReX_ROOT PATH_SUFFIXES Tools/typechecker) + HINTS ${AMReX_SOURCE_DIR} ${AMReX_ROOT} ENV AMReX_ROOT + PATH_SUFFIXES + Tools/typechecker # in-source + share/amrex/typechecker # installed + ) add_custom_target( typecheck_${_target} COMMAND python3 ${_typechecker} From 7c5ff6911763109c92f48f2f3e9fea06b2e868b8 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 19 Oct 2023 15:04:52 -0700 Subject: [PATCH 021/186] Print AMReX version at the beginning of Initialize (#3604) Oftentimes, a GPU run dies at the first GPU kernel inside amrex::Initialize before printing a message showing AMReX with a certain version has been initialized. Adding a message at the beginning of amrex::Initialize can provide useful information. --- Src/Base/AMReX.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp index f0e56952de2..147f8275c57 100644 --- a/Src/Base/AMReX.cpp +++ b/Src/Base/AMReX.cpp @@ -443,6 +443,10 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, pp.queryAdd("verbose", system::verbose); } + if (system::verbose > 0) { + amrex::Print() << "Initializing AMReX (" << amrex::Version() << ")...\n"; + } + #ifdef AMREX_USE_MPI if (system::verbose > 0) { amrex::Print() << "MPI initialized with " From 54c337f36164227a9b7dd74d5e647cf3627a23d0 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 19 Oct 2023 18:03:56 -0700 Subject: [PATCH 022/186] Windows: Fix Installed AMReXBuildInfo.cmake (#3606) ## Summary Account for different CMake path on Unix vs. Windows. https://cmake.org/cmake/help/latest/command/find_package.html#config-mode-search-procedure ## Additional background Follow-up to #3599 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Tools/CMake/AMReXBuildInfo.cmake | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Tools/CMake/AMReXBuildInfo.cmake b/Tools/CMake/AMReXBuildInfo.cmake index 0d695fad9db..9afbc2ceaf4 100644 --- a/Tools/CMake/AMReXBuildInfo.cmake +++ b/Tools/CMake/AMReXBuildInfo.cmake @@ -40,8 +40,15 @@ include(AMReXTargetHelpers) # if (AMReX_FOUND) # AMReX is pre-installed and used as a library - string(REPLACE "/lib/cmake/AMReX/AMReXCMakeModules" "" AMREX_TOP_DIR_DEFAULT - ${CMAKE_CURRENT_LIST_DIR}) + if (WIN32) # see AMReXInstallHelpers.cmake + string(REPLACE "/cmake/AMReXCMakeModules" "" + AMREX_TOP_DIR_DEFAULT + ${CMAKE_CURRENT_LIST_DIR}) + else () + string(REPLACE "/lib/cmake/AMReX/AMReXCMakeModules" "" + AMREX_TOP_DIR_DEFAULT + ${CMAKE_CURRENT_LIST_DIR}) + endif () else () # this is a superbuild string(REPLACE "/Tools/CMake" "" AMREX_TOP_DIR_DEFAULT From da79aff8053058371a78d4bf85488384242368ee Mon Sep 17 00:00:00 2001 From: Jordan Musser Date: Mon, 23 Oct 2023 11:21:38 -0400 Subject: [PATCH 023/186] Add runtime particle components to HDF5 wrapper (#3596) ## Summary Add runtime components when computing the sizes/offsets for particle HDF5 files. ## Additional background ## Checklist The proposed changes: - [ x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Jordan Musser Co-authored-by: Andrew Myers --- Src/Extern/HDF5/AMReX_ParticleHDF5.H | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Src/Extern/HDF5/AMReX_ParticleHDF5.H b/Src/Extern/HDF5/AMReX_ParticleHDF5.H index 414db4ad29b..817a8e0196f 100644 --- a/Src/Extern/HDF5/AMReX_ParticleHDF5.H +++ b/Src/Extern/HDF5/AMReX_ParticleHDF5.H @@ -711,12 +711,13 @@ ParticleContainer_impl Date: Mon, 23 Oct 2023 11:18:33 -0600 Subject: [PATCH 024/186] update doc for amrex::Abort on GPU (#3605) The docs on the behavior of amrex::Abort and related functions appear to be out of date, so this clarifies that when running on GPU these functions do not do anything when called from within GPU kernels unless you compile in debug mode or with `USE_ASSERTION=TRUE`. --- Docs/sphinx_documentation/source/Basics.rst | 3 +++ Docs/sphinx_documentation/source/GPU.rst | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Docs/sphinx_documentation/source/Basics.rst b/Docs/sphinx_documentation/source/Basics.rst index 1227b5f7b71..ebddd14f9b5 100644 --- a/Docs/sphinx_documentation/source/Basics.rst +++ b/Docs/sphinx_documentation/source/Basics.rst @@ -2812,3 +2812,6 @@ Backtrace files are produced by AMReX signal handler by default when segfault occurs or ``Abort`` is called. If the application does not want AMReX to handle this, ``ParmParse`` parameter `amrex.signal_handling=0` can be used to disable it. + +See :ref:`sec:gpu:assertion` for considerations on using these functions in +GPU-enabled code. diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index 56d540e63c4..90dbcc9f26f 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -1553,9 +1553,13 @@ Assertions and Error Checking To help debugging, we often use :cpp:`amrex::Assert` and :cpp:`amrex::Abort`. These functions are GPU safe and can be used in GPU kernels. However, implementing these functions requires additional -GPU registers, which will reduce overall performance. Therefore, it -is preferred to implement such calls in debug mode only by wrapping the -calls using ``#ifdef AMREX_DEBUG``. +GPU registers, which will reduce overall performance. Therefore, by +default these functions and the macro ``AMREX_ALWAYS_ASSERT`` are no-ops +for optimized builds (e.g., ``DEBUG=FALSE`` using the GNU Make build +system) when called from kernels run on GPU. Calls to these functions from +GPU kernels are active for debug builds and can optionally be activated +at compile time for optimized builds (e.g., ``DEBUG=FALSE`` and +``USE_ASSERTION=TRUE`` using the GNU Make build system). In CPU code, :cpp:`AMREX_GPU_ERROR_CHECK()` can be called to check the health of previous GPU launches. This call From be6c6415467d09da6109d27cfa218868abc1f9db Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 26 Oct 2023 09:50:01 -0700 Subject: [PATCH 025/186] `AMReXBuildInfo.cmake`: AMReX_DIR (#3609) ## Summary The `AMReX_DIR` points if set to the CMake module path root. The old logic did not work for me in a situation (ImpactX) where: - AMReX is pre-installed but - found through a superbuild of another transient lib (ABLASTR) ## Additional background Follow-up to #3599 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Tools/CMake/AMReXBuildInfo.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/CMake/AMReXBuildInfo.cmake b/Tools/CMake/AMReXBuildInfo.cmake index 9afbc2ceaf4..0b87f053c2e 100644 --- a/Tools/CMake/AMReXBuildInfo.cmake +++ b/Tools/CMake/AMReXBuildInfo.cmake @@ -38,7 +38,7 @@ include(AMReXTargetHelpers) # # Set paths # -if (AMReX_FOUND) +if (AMReX_DIR) # AMReX is pre-installed and used as a library if (WIN32) # see AMReXInstallHelpers.cmake string(REPLACE "/cmake/AMReXCMakeModules" "" @@ -56,7 +56,7 @@ else () endif () set(AMREX_TOP_DIR "${AMREX_TOP_DIR_DEFAULT}" CACHE INTERNAL "Top level AMReX directory") -if (AMReX_FOUND) +if (AMReX_DIR) # AMReX is pre-installed and used as a library set(AMREX_C_SCRIPTS_DIR "${AMREX_TOP_DIR}/share/amrex/C_scripts" CACHE INTERNAL "Path to AMReX' C_scripts dir") From 601cc4ee80e0d7435a5b099202459bbb1f16b7a6 Mon Sep 17 00:00:00 2001 From: Klaus Date: Tue, 31 Oct 2023 14:07:41 -0500 Subject: [PATCH 026/186] Give FlashFluxRegisters ways to accumulate data in registers (#3597) I introduce variant methods that allow adding to previously stored data in Flash(-X) flux registers, rather than just copying over the older data. For Flash-X'ers: Something like this is needed to get flux correction working in the nontelescoping variant of the current Spark Hydro implementation. ## Summary ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Weiqun Zhang --- .../AmrCore/AMReX_FlashFluxRegister.H | 416 ++++++++++++++++++ .../AmrCore/AMReX_FlashFluxRegister.cpp | 280 +----------- .../AmrCore/AMReX_flash_fluxregister_fi.cpp | 35 ++ .../AmrCore/AMReX_flash_fluxregister_mod.F90 | 85 +++- 4 files changed, 544 insertions(+), 272 deletions(-) diff --git a/Src/F_Interfaces/AmrCore/AMReX_FlashFluxRegister.H b/Src/F_Interfaces/AmrCore/AMReX_FlashFluxRegister.H index d946d33fdbc..3756a4bd1f2 100644 --- a/Src/F_Interfaces/AmrCore/AMReX_FlashFluxRegister.H +++ b/Src/F_Interfaces/AmrCore/AMReX_FlashFluxRegister.H @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -42,6 +43,18 @@ public: void store (int fine_global_index, int dir, FArrayBox const& fine_flux, FArrayBox const& area, const int* isFluxDensity, Real sf); + // flux_in_register += scaling_factor * \sum{fine_flux} / (ref_ratio[0]*ref_ratio[1]*ref_ratio[2]) + void add (int fine_global_index, int dir, FArrayBox const& fine_flux, Real sf); + + // flux_in_register += scaling_factor * \sum{fine_flux * area} + void add (int fine_global_index, int dir, FArrayBox const& fine_flux, FArrayBox const& area, + Real sf); + + // flux_in_register += scaling_factor * \sum{fine_flux * area}, if the component is flux density + // scaling_factor * \sum{fine_flux} , otherwise + void add (int fine_global_index, int dir, FArrayBox const& fine_flux, FArrayBox const& area, + const int* isFluxDensity, Real sf); + void communicate (); // crse_flux = flux_in_register * scaling_factor @@ -63,6 +76,20 @@ public: void load (int crse_global_index, int dir, FArrayBox& crse_flux, FArrayBox const& cflux, FArrayBox const& area, const int* isFluxDensity, Real sf_f, Real sf_c) const; + enum struct OpType { Store, Add }; + + template + void store_or_add (int fine_global_index, int dir, FArrayBox const& fine_flux, Real sf); + + template + void store_or_add (int fine_global_index, int dir, FArrayBox const& fine_flux, FArrayBox const& area, + Real sf); + + template + void store_or_add (int fine_global_index, int dir, FArrayBox const& fine_flux, FArrayBox const& area, + const int* isFluxDensity, Real sf); + + protected: BoxArray m_fine_grids; @@ -86,6 +113,395 @@ protected: mutable Vector > m_d_ifd; }; +template +void FlashFluxRegister::store_or_add (int fine_global_index, int dir, + FArrayBox const& fine_flux, Real sf) +{ + AMREX_ASSERT(dir < AMREX_SPACEDIM); + auto found = m_fine_map.find(fine_global_index); + if (found != m_fine_map.end()) { + const int ncomp = m_ncomp; + Array const& fab_a = found->second; + if (fab_a[dir]) { + Box const& b = fab_a[dir]->box(); + Array4 const& dest = fab_a[dir]->array(); + Array4 const& src = fine_flux.const_array(); + if (dir == 0) { +#if (AMREX_SPACEDIM == 1) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(j,k,dest); + auto rhs = src(2*i,0,0,n)*sf; + if constexpr (op == OpType::Store) { + dest(i,0,0,n) = rhs; + } else { + dest(i,0,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 2) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(k,dest); + auto rhs = (src(2*i,2*j ,0,n) + + src(2*i,2*j+1,0,n)) * (Real(0.5)*sf); + if constexpr (op == OpType::Store) { + dest(i,j,0,n) = rhs; + } else { + dest(i,j,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 3) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + auto rhs = (src(2*i,2*j ,2*k ,n) + + src(2*i,2*j+1,2*k ,n) + + src(2*i,2*j ,2*k+1,n) + + src(2*i,2*j+1,2*k+1,n)) * (Real(0.25)*sf); + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); +#endif + } +#if (AMREX_SPACEDIM >= 2) + else if (dir == 1) { +#if (AMREX_SPACEDIM == 2) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(k,dest); + auto rhs = (src(2*i ,2*j,0,n) + + src(2*i+1,2*j,0,n)) * (Real(0.5)*sf); + if constexpr (op == OpType::Store) { + dest(i,j,0,n) = rhs; + } else { + dest(i,j,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 3) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + auto rhs = (src(2*i ,2*j,2*k ,n) + + src(2*i+1,2*j,2*k ,n) + + src(2*i ,2*j,2*k+1,n) + + src(2*i+1,2*j,2*k+1,n)) * (Real(0.25)*sf); + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); +#endif + } +#if (AMREX_SPACEDIM == 3) + else { + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + auto rhs = (src(2*i ,2*j ,2*k,n) + + src(2*i+1,2*j ,2*k,n) + + src(2*i ,2*j+1,2*k,n) + + src(2*i+1,2*j+1,2*k,n)) * (Real(0.25)*sf); + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); + } +#endif +#endif + } + } +} + +template +void FlashFluxRegister::store_or_add (int fine_global_index, int dir, + FArrayBox const& fine_flux, + FArrayBox const& fine_area, Real sf) +{ + AMREX_ASSERT(dir < AMREX_SPACEDIM); + auto found = m_fine_map.find(fine_global_index); + if (found != m_fine_map.end()) { + const int ncomp = m_ncomp; + Array const& fab_a = found->second; + if (fab_a[dir]) { + Box const& b = fab_a[dir]->box(); + Array4 const& dest = fab_a[dir]->array(); + Array4 const& src = fine_flux.const_array(); + Array4 const& area = fine_area.const_array(); + if (dir == 0) { +#if (AMREX_SPACEDIM == 1) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(j,k,dest); + auto rhs = src(2*i,0,0,n)*area(2*i,0,0)*sf; + if constexpr (op == OpType::Store) { + dest(i,0,0,n) = rhs; + } else { + dest(i,0,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 2) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(k,dest); + auto rhs = (src(2*i,2*j ,0,n)*area(2*i,2*j ,0) + + src(2*i,2*j+1,0,n)*area(2*i,2*j+1,0)) * sf; + if constexpr (op == OpType::Store) { + dest(i,j,0,n) = rhs; + } else { + dest(i,j,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 3) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + auto rhs = (src(2*i,2*j ,2*k ,n)*area(2*i,2*j ,2*k ) + + src(2*i,2*j+1,2*k ,n)*area(2*i,2*j+1,2*k ) + + src(2*i,2*j ,2*k+1,n)*area(2*i,2*j ,2*k+1) + + src(2*i,2*j+1,2*k+1,n)*area(2*i,2*j+1,2*k+1)) * sf; + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); +#endif + } +#if (AMREX_SPACEDIM >= 2) + else if (dir == 1) { +#if (AMREX_SPACEDIM == 2) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(k,dest); + auto rhs = (src(2*i ,2*j,0,n)*area(2*i ,2*j,0) + + src(2*i+1,2*j,0,n)*area(2*i+1,2*j,0)) * sf; + if constexpr (op == OpType::Store) { + dest(i,j,0,n) = rhs; + } else { + dest(i,j,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 3) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + auto rhs = (src(2*i ,2*j,2*k ,n)*area(2*i ,2*j,2*k ) + + src(2*i+1,2*j,2*k ,n)*area(2*i+1,2*j,2*k ) + + src(2*i ,2*j,2*k+1,n)*area(2*i ,2*j,2*k+1) + + src(2*i+1,2*j,2*k+1,n)*area(2*i+1,2*j,2*k+1)) * sf; + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); +#endif + } +#if (AMREX_SPACEDIM == 3) + else { + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + auto rhs = (src(2*i ,2*j ,2*k,n)*area(2*i ,2*j ,2*k) + + src(2*i+1,2*j ,2*k,n)*area(2*i+1,2*j ,2*k) + + src(2*i ,2*j+1,2*k,n)*area(2*i ,2*j+1,2*k) + + src(2*i+1,2*j+1,2*k,n)*area(2*i+1,2*j+1,2*k)) * sf; + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); + } +#endif +#endif + } + } +} + +template +void FlashFluxRegister::store_or_add (int fine_global_index, int dir, + FArrayBox const& fine_flux, + FArrayBox const& fine_area, + const int* isFluxDensity, Real sf) +{ + auto& h_ifd = m_h_ifd[OpenMP::get_thread_num()]; + auto& d_ifd = m_d_ifd[OpenMP::get_thread_num()]; + + AMREX_ASSERT(dir < AMREX_SPACEDIM); + auto found = m_fine_map.find(fine_global_index); + if (found != m_fine_map.end()) { + const int ncomp = m_ncomp; + Array const& fab_a = found->second; + if (fab_a[dir]) { + bool allsame = true; + for (int n = 0; n < m_ncomp; ++n) { + if (h_ifd[n] != isFluxDensity[n]) { + allsame = false; + h_ifd[n] = isFluxDensity[n]; + } + } + if (d_ifd.empty()) { + allsame = false; + d_ifd.resize(m_ncomp); + } + if (! allsame) { + Gpu::copyAsync(Gpu::HostToDevice(), h_ifd.begin(), h_ifd.end(), d_ifd.begin()); + } + + Box const& b = fab_a[dir]->box(); + Array4 const& dest = fab_a[dir]->array(); + Array4 const& src = fine_flux.const_array(); + Array4 const& area = fine_area.const_array(); + const int* ifd = d_ifd.data(); + if (dir == 0) { +#if (AMREX_SPACEDIM == 1) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(j,k,dest); + Real rhs; + if (ifd[n]) { + rhs = src(2*i,0,0,n)*area(2*i,0,0)*sf; + } else { + rhs = src(2*i,0,0,n)*sf; + } + if constexpr (op == OpType::Store) { + dest(i,0,0,n) = rhs; + } else { + dest(i,0,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 2) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(k,dest); + Real rhs; + if (ifd[n]) { + rhs = (src(2*i,2*j ,0,n)*area(2*i,2*j ,0) + + src(2*i,2*j+1,0,n)*area(2*i,2*j+1,0)) * sf; + } else { + rhs = (src(2*i,2*j ,0,n) + + src(2*i,2*j+1,0,n)) * sf; + } + if constexpr (op == OpType::Store) { + dest(i,j,0,n) = rhs; + } else { + dest(i,j,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 3) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + Real rhs; + if (ifd[n]) { + rhs = (src(2*i,2*j ,2*k ,n)*area(2*i,2*j ,2*k ) + + src(2*i,2*j+1,2*k ,n)*area(2*i,2*j+1,2*k ) + + src(2*i,2*j ,2*k+1,n)*area(2*i,2*j ,2*k+1) + + src(2*i,2*j+1,2*k+1,n)*area(2*i,2*j+1,2*k+1)) * sf; + } else { + rhs = (src(2*i,2*j ,2*k ,n) + + src(2*i,2*j+1,2*k ,n) + + src(2*i,2*j ,2*k+1,n) + + src(2*i,2*j+1,2*k+1,n)) * sf; + } + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); +#endif + } +#if (AMREX_SPACEDIM >= 2) + else if (dir == 1) { +#if (AMREX_SPACEDIM == 2) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + amrex::ignore_unused(k,dest); + Real rhs; + if (ifd[n]) { + rhs = (src(2*i ,2*j,0,n)*area(2*i ,2*j,0) + + src(2*i+1,2*j,0,n)*area(2*i+1,2*j,0)) * sf; + } else { + rhs = (src(2*i ,2*j,0,n) + + src(2*i+1,2*j,0,n)) * sf; + } + if constexpr (op == OpType::Store) { + dest(i,j,0,n) = rhs; + } else { + dest(i,j,0,n) += rhs; + } + }); +#endif +#if (AMREX_SPACEDIM == 3) + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + Real rhs; + if (ifd[n]) { + rhs = (src(2*i ,2*j,2*k ,n)*area(2*i ,2*j,2*k ) + + src(2*i+1,2*j,2*k ,n)*area(2*i+1,2*j,2*k ) + + src(2*i ,2*j,2*k+1,n)*area(2*i ,2*j,2*k+1) + + src(2*i+1,2*j,2*k+1,n)*area(2*i+1,2*j,2*k+1)) * sf; + } else { + rhs = (src(2*i ,2*j,2*k ,n) + + src(2*i+1,2*j,2*k ,n) + + src(2*i ,2*j,2*k+1,n) + + src(2*i+1,2*j,2*k+1,n)) * sf; + } + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); +#endif + } +#if (AMREX_SPACEDIM == 3) + else { + AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, + { + Real rhs; + if (ifd[n]) { + rhs = (src(2*i ,2*j ,2*k,n)*area(2*i ,2*j ,2*k) + + src(2*i+1,2*j ,2*k,n)*area(2*i+1,2*j ,2*k) + + src(2*i ,2*j+1,2*k,n)*area(2*i ,2*j+1,2*k) + + src(2*i+1,2*j+1,2*k,n)*area(2*i+1,2*j+1,2*k)) * sf; + } else { + rhs = (src(2*i ,2*j ,2*k,n) + + src(2*i+1,2*j ,2*k,n) + + src(2*i ,2*j+1,2*k,n) + + src(2*i+1,2*j+1,2*k,n)) * sf; + } + amrex::ignore_unused(dest); // for cuda + if constexpr (op == OpType::Store) { + dest(i,j,k,n) = rhs; + } else { + dest(i,j,k,n) += rhs; + } + }); + } +#endif +#endif + } + } +} + } #endif diff --git a/Src/F_Interfaces/AmrCore/AMReX_FlashFluxRegister.cpp b/Src/F_Interfaces/AmrCore/AMReX_FlashFluxRegister.cpp index abe74c41125..0e804f76a12 100644 --- a/Src/F_Interfaces/AmrCore/AMReX_FlashFluxRegister.cpp +++ b/Src/F_Interfaces/AmrCore/AMReX_FlashFluxRegister.cpp @@ -1,5 +1,4 @@ #include -#include namespace amrex { @@ -178,279 +177,36 @@ void FlashFluxRegister::define (const BoxArray& fba, const BoxArray& cba, void FlashFluxRegister::store (int fine_global_index, int dir, FArrayBox const& fine_flux, Real sf) { - AMREX_ASSERT(dir < AMREX_SPACEDIM); - auto found = m_fine_map.find(fine_global_index); - if (found != m_fine_map.end()) { - const int ncomp = m_ncomp; - Array const& fab_a = found->second; - if (fab_a[dir]) { - Box const& b = fab_a[dir]->box(); - Array4 const& dest = fab_a[dir]->array(); - Array4 const& src = fine_flux.const_array(); - if (dir == 0) { -#if (AMREX_SPACEDIM == 1) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(j,k); - dest(i,0,0,n) = src(2*i,0,0,n)*sf; - }); -#endif -#if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(k); - dest(i,j,0,n) = (src(2*i,2*j ,0,n) + - src(2*i,2*j+1,0,n)) * (Real(0.5)*sf); - }); -#endif -#if (AMREX_SPACEDIM == 3) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - dest(i,j,k,n) = (src(2*i,2*j ,2*k ,n) + - src(2*i,2*j+1,2*k ,n) + - src(2*i,2*j ,2*k+1,n) + - src(2*i,2*j+1,2*k+1,n)) * (Real(0.25)*sf); - }); -#endif - } -#if (AMREX_SPACEDIM >= 2) - else if (dir == 1) { -#if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(k); - dest(i,j,0,n) = (src(2*i ,2*j,0,n) + - src(2*i+1,2*j,0,n)) * (Real(0.5)*sf); - }); -#endif -#if (AMREX_SPACEDIM == 3) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - dest(i,j,k,n) = (src(2*i ,2*j,2*k ,n) + - src(2*i+1,2*j,2*k ,n) + - src(2*i ,2*j,2*k+1,n) + - src(2*i+1,2*j,2*k+1,n)) * (Real(0.25)*sf); - }); -#endif - } -#if (AMREX_SPACEDIM == 3) - else { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - dest(i,j,k,n) = (src(2*i ,2*j ,2*k,n) + - src(2*i+1,2*j ,2*k,n) + - src(2*i ,2*j+1,2*k,n) + - src(2*i+1,2*j+1,2*k,n)) * (Real(0.25)*sf); - }); - } -#endif -#endif - } - } + store_or_add(fine_global_index, dir, fine_flux, sf); } void FlashFluxRegister::store (int fine_global_index, int dir, FArrayBox const& fine_flux, FArrayBox const& fine_area, Real sf) { - AMREX_ASSERT(dir < AMREX_SPACEDIM); - auto found = m_fine_map.find(fine_global_index); - if (found != m_fine_map.end()) { - const int ncomp = m_ncomp; - Array const& fab_a = found->second; - if (fab_a[dir]) { - Box const& b = fab_a[dir]->box(); - Array4 const& dest = fab_a[dir]->array(); - Array4 const& src = fine_flux.const_array(); - Array4 const& area = fine_area.const_array(); - if (dir == 0) { -#if (AMREX_SPACEDIM == 1) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(j,k); - dest(i,0,0,n) = src(2*i,0,0,n)*area(2*i,0,0)*sf; - }); -#endif -#if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(k); - dest(i,j,0,n) = (src(2*i,2*j ,0,n)*area(2*i,2*j ,0) + - src(2*i,2*j+1,0,n)*area(2*i,2*j+1,0)) * sf; - }); -#endif -#if (AMREX_SPACEDIM == 3) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - dest(i,j,k,n) = (src(2*i,2*j ,2*k ,n)*area(2*i,2*j ,2*k ) + - src(2*i,2*j+1,2*k ,n)*area(2*i,2*j+1,2*k ) + - src(2*i,2*j ,2*k+1,n)*area(2*i,2*j ,2*k+1) + - src(2*i,2*j+1,2*k+1,n)*area(2*i,2*j+1,2*k+1)) * sf; - }); -#endif - } -#if (AMREX_SPACEDIM >= 2) - else if (dir == 1) { -#if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(k); - dest(i,j,0,n) = (src(2*i ,2*j,0,n)*area(2*i ,2*j,0) + - src(2*i+1,2*j,0,n)*area(2*i+1,2*j,0)) * sf; - }); -#endif -#if (AMREX_SPACEDIM == 3) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - dest(i,j,k,n) = (src(2*i ,2*j,2*k ,n)*area(2*i ,2*j,2*k ) + - src(2*i+1,2*j,2*k ,n)*area(2*i+1,2*j,2*k ) + - src(2*i ,2*j,2*k+1,n)*area(2*i ,2*j,2*k+1) + - src(2*i+1,2*j,2*k+1,n)*area(2*i+1,2*j,2*k+1)) * sf; - }); -#endif - } -#if (AMREX_SPACEDIM == 3) - else { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - dest(i,j,k,n) = (src(2*i ,2*j ,2*k,n)*area(2*i ,2*j ,2*k) + - src(2*i+1,2*j ,2*k,n)*area(2*i+1,2*j ,2*k) + - src(2*i ,2*j+1,2*k,n)*area(2*i ,2*j+1,2*k) + - src(2*i+1,2*j+1,2*k,n)*area(2*i+1,2*j+1,2*k)) * sf; - }); - } -#endif -#endif - } - } + store_or_add(fine_global_index, dir, fine_flux, fine_area, sf); } void FlashFluxRegister::store (int fine_global_index, int dir, FArrayBox const& fine_flux, FArrayBox const& fine_area, const int* isFluxDensity, Real sf) { - auto& h_ifd = m_h_ifd[OpenMP::get_thread_num()]; - auto& d_ifd = m_d_ifd[OpenMP::get_thread_num()]; + store_or_add(fine_global_index, dir, fine_flux, fine_area, isFluxDensity, sf); +} - AMREX_ASSERT(dir < AMREX_SPACEDIM); - auto found = m_fine_map.find(fine_global_index); - if (found != m_fine_map.end()) { - const int ncomp = m_ncomp; - Array const& fab_a = found->second; - if (fab_a[dir]) { - bool allsame = true; - for (int n = 0; n < m_ncomp; ++n) { - if (h_ifd[n] != isFluxDensity[n]) { - allsame = false; - h_ifd[n] = isFluxDensity[n]; - } - } - if (d_ifd.empty()) { - allsame = false; - d_ifd.resize(m_ncomp); - } - if (! allsame) { - Gpu::copyAsync(Gpu::HostToDevice(), h_ifd.begin(), h_ifd.end(), d_ifd.begin()); - } +void FlashFluxRegister::add (int fine_global_index, int dir, FArrayBox const& fine_flux, Real sf) +{ + store_or_add(fine_global_index, dir, fine_flux, sf); +} - Box const& b = fab_a[dir]->box(); - Array4 const& dest = fab_a[dir]->array(); - Array4 const& src = fine_flux.const_array(); - Array4 const& area = fine_area.const_array(); - const int* ifd = d_ifd.data(); - if (dir == 0) { -#if (AMREX_SPACEDIM == 1) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(j,k); - if (ifd[n]) { - dest(i,0,0,n) = src(2*i,0,0,n)*area(2*i,0,0)*sf; - } else { - dest(i,0,0,n) = src(2*i,0,0,n)*sf; - } - }); -#endif -#if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(k); - if (ifd[n]) { - dest(i,j,0,n) = (src(2*i,2*j ,0,n)*area(2*i,2*j ,0) + - src(2*i,2*j+1,0,n)*area(2*i,2*j+1,0)) * sf; - } else { - dest(i,j,0,n) = (src(2*i,2*j ,0,n) + - src(2*i,2*j+1,0,n)) * sf; - } - }); -#endif -#if (AMREX_SPACEDIM == 3) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - if (ifd[n]) { - dest(i,j,k,n) = (src(2*i,2*j ,2*k ,n)*area(2*i,2*j ,2*k ) + - src(2*i,2*j+1,2*k ,n)*area(2*i,2*j+1,2*k ) + - src(2*i,2*j ,2*k+1,n)*area(2*i,2*j ,2*k+1) + - src(2*i,2*j+1,2*k+1,n)*area(2*i,2*j+1,2*k+1)) * sf; - } else { - dest(i,j,k,n) = (src(2*i,2*j ,2*k ,n) + - src(2*i,2*j+1,2*k ,n) + - src(2*i,2*j ,2*k+1,n) + - src(2*i,2*j+1,2*k+1,n)) * sf; - } - }); -#endif - } -#if (AMREX_SPACEDIM >= 2) - else if (dir == 1) { -#if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - amrex::ignore_unused(k); - if (ifd[n]) { - dest(i,j,0,n) = (src(2*i ,2*j,0,n)*area(2*i ,2*j,0) + - src(2*i+1,2*j,0,n)*area(2*i+1,2*j,0)) * sf; - } else { - dest(i,j,0,n) = (src(2*i ,2*j,0,n) + - src(2*i+1,2*j,0,n)) * sf; - } - }); -#endif -#if (AMREX_SPACEDIM == 3) - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - if (ifd[n]) { - dest(i,j,k,n) = (src(2*i ,2*j,2*k ,n)*area(2*i ,2*j,2*k ) + - src(2*i+1,2*j,2*k ,n)*area(2*i+1,2*j,2*k ) + - src(2*i ,2*j,2*k+1,n)*area(2*i ,2*j,2*k+1) + - src(2*i+1,2*j,2*k+1,n)*area(2*i+1,2*j,2*k+1)) * sf; - } else { - dest(i,j,k,n) = (src(2*i ,2*j,2*k ,n) + - src(2*i+1,2*j,2*k ,n) + - src(2*i ,2*j,2*k+1,n) + - src(2*i+1,2*j,2*k+1,n)) * sf; - } - }); -#endif - } -#if (AMREX_SPACEDIM == 3) - else { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (b, ncomp, i, j, k, n, - { - if (ifd[n]) { - dest(i,j,k,n) = (src(2*i ,2*j ,2*k,n)*area(2*i ,2*j ,2*k) + - src(2*i+1,2*j ,2*k,n)*area(2*i+1,2*j ,2*k) + - src(2*i ,2*j+1,2*k,n)*area(2*i ,2*j+1,2*k) + - src(2*i+1,2*j+1,2*k,n)*area(2*i+1,2*j+1,2*k)) * sf; - } else { - dest(i,j,k,n) = (src(2*i ,2*j ,2*k,n) + - src(2*i+1,2*j ,2*k,n) + - src(2*i ,2*j+1,2*k,n) + - src(2*i+1,2*j+1,2*k,n)) * sf; - } - }); - } -#endif -#endif - } - } +void FlashFluxRegister::add (int fine_global_index, int dir, FArrayBox const& fine_flux, + FArrayBox const& fine_area, Real sf) +{ + store_or_add(fine_global_index, dir, fine_flux, fine_area, sf); +} + +void FlashFluxRegister::add (int fine_global_index, int dir, FArrayBox const& fine_flux, + FArrayBox const& fine_area, const int* isFluxDensity, Real sf) +{ + store_or_add(fine_global_index, dir, fine_flux, fine_area, isFluxDensity, sf); } void FlashFluxRegister::communicate () diff --git a/Src/F_Interfaces/AmrCore/AMReX_flash_fluxregister_fi.cpp b/Src/F_Interfaces/AmrCore/AMReX_flash_fluxregister_fi.cpp index e8c2bace6d5..562405a889e 100644 --- a/Src/F_Interfaces/AmrCore/AMReX_flash_fluxregister_fi.cpp +++ b/Src/F_Interfaces/AmrCore/AMReX_flash_fluxregister_fi.cpp @@ -119,6 +119,41 @@ extern "C" { flux_reg->store(fgid, dir, fab, areafab, ifd, scaling_factor); } + void amrex_fi_flash_fluxregister_add (FlashFluxRegister* flux_reg, int fgid, int dir, + Real const* flux, const int* flo, const int* fhi, int nc, + Real scaling_factor) + { + Box bx; + bx = Box(IntVect(flo), IntVect(fhi)); + bx.shiftHalf(dir,-1); + const FArrayBox fab(bx,nc,const_cast(flux)); + flux_reg->add(fgid, dir, fab, scaling_factor); + } + + void amrex_fi_flash_fluxregister_add_area (FlashFluxRegister* flux_reg, int fgid, int dir, + Real const* flux, const int* flo, const int* fhi, int nc, + Real const* area, Real scaling_factor) + { + Box bx; + bx = Box(IntVect(flo), IntVect(fhi)); + bx.shiftHalf(dir,-1); + const FArrayBox fab(bx,nc,const_cast(flux)); + const FArrayBox areafab(bx,1,const_cast(area)); + flux_reg->add(fgid, dir, fab, areafab, scaling_factor); + } + + void amrex_fi_flash_fluxregister_add_area_ifd (FlashFluxRegister* flux_reg, int fgid, int dir, + Real const* flux, const int* flo, const int* fhi, int nc, + Real const* area, const int* ifd, Real scaling_factor) + { + Box bx; + bx = Box(IntVect(flo), IntVect(fhi)); + bx.shiftHalf(dir,-1); + const FArrayBox fab(bx,nc,const_cast(flux)); + const FArrayBox areafab(bx,1,const_cast(area)); + flux_reg->add(fgid, dir, fab, areafab, ifd, scaling_factor); + } + void amrex_fi_flash_fluxregister_communicate (FlashFluxRegister* flux_reg) { flux_reg->communicate(); diff --git a/Src/F_Interfaces/AmrCore/AMReX_flash_fluxregister_mod.F90 b/Src/F_Interfaces/AmrCore/AMReX_flash_fluxregister_mod.F90 index e13cc0e2543..dc0f00fdd0b 100644 --- a/Src/F_Interfaces/AmrCore/AMReX_flash_fluxregister_mod.F90 +++ b/Src/F_Interfaces/AmrCore/AMReX_flash_fluxregister_mod.F90 @@ -138,6 +138,36 @@ subroutine amrex_fi_flash_fluxregister_store_area_ifd (fr,cgid,dir,flux,flo,fhi, real(amrex_real), value :: scale end subroutine amrex_fi_flash_fluxregister_store_area_ifd + subroutine amrex_fi_flash_fluxregister_add (fr,cgid,dir,flux,flo,fhi,nc,scale) bind(c) + import + implicit none + type(c_ptr), value :: fr + real(amrex_real), intent(in) :: flux(*) + integer(c_int), value, intent(in) :: cgid, dir, nc + integer(c_int), intent(in) :: flo(*), fhi(*) + real(amrex_real), value :: scale + end subroutine amrex_fi_flash_fluxregister_add + + subroutine amrex_fi_flash_fluxregister_add_area (fr,cgid,dir,flux,flo,fhi,nc,area,scale) bind(c) + import + implicit none + type(c_ptr), value :: fr + real(amrex_real), intent(in) :: flux(*), area(*) + integer(c_int), value, intent(in) :: cgid, dir, nc + integer(c_int), intent(in) :: flo(*), fhi(*) + real(amrex_real), value :: scale + end subroutine amrex_fi_flash_fluxregister_add_area + + subroutine amrex_fi_flash_fluxregister_add_area_ifd (fr,cgid,dir,flux,flo,fhi,nc,area,ifd,scale) bind(c) + import + implicit none + type(c_ptr), value :: fr + real(amrex_real), intent(in) :: flux(*), area(*) + integer(c_int), value, intent(in) :: cgid, dir, nc + integer(c_int), intent(in) :: flo(*), fhi(*), ifd(*) + real(amrex_real), value :: scale + end subroutine amrex_fi_flash_fluxregister_add_area_ifd + subroutine amrex_fi_flash_fluxregister_communicate (fr) bind(c) import implicit none @@ -186,50 +216,80 @@ subroutine amrex_flash_fluxregister_communicate (this) call amrex_fi_flash_fluxregister_communicate(this%p) end subroutine amrex_flash_fluxregister_communicate - subroutine amrex_flash_fluxregister_store (this, flux, flo, fhi, grid_idx, dir, scale) + subroutine amrex_flash_fluxregister_store (this, flux, flo, fhi, grid_idx, dir, addit, scale) class(amrex_flash_fluxregister), intent(inout) :: this integer, intent(in) :: flo(*), fhi(*), grid_idx, dir real(amrex_real), intent(in) :: flux(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3),flo(4):fhi(4)) + logical, optional, intent(in) :: addit real(amrex_real), optional, intent(in) :: scale - ! + logical :: my_addit real(amrex_real) :: my_scale + if (present(addit)) then + my_addit = addit + else + my_addit = .FALSE. + end if if (present(scale)) then my_scale = scale else my_scale = 1._amrex_real end if - call amrex_fi_flash_fluxregister_store(this%p, grid_idx, dir, & - flux, flo, fhi, fhi(4)-flo(4)+1, my_scale) + if (my_addit) then + call amrex_fi_flash_fluxregister_add(this%p, grid_idx, dir, & + flux, flo, fhi, fhi(4)-flo(4)+1, my_scale) + else + call amrex_fi_flash_fluxregister_store(this%p, grid_idx, dir, & + flux, flo, fhi, fhi(4)-flo(4)+1, my_scale) + end if end subroutine amrex_flash_fluxregister_store - subroutine amrex_flash_fluxregister_store_area (this, flux, area, flo, fhi, grid_idx, dir, scale) + subroutine amrex_flash_fluxregister_store_area (this, flux, area, flo, fhi, grid_idx, dir, addit, scale) class(amrex_flash_fluxregister), intent(inout) :: this integer, intent(in) :: flo(*), fhi(*), grid_idx, dir real(amrex_real), intent(in) :: flux(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3),flo(4):fhi(4)) real(amrex_real), intent(in) :: area(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3)) + logical, optional, intent(in) :: addit real(amrex_real), optional, intent(in) :: scale ! + logical :: my_addit real(amrex_real) :: my_scale + if (present(addit)) then + my_addit = addit + else + my_addit = .FALSE. + end if if (present(scale)) then my_scale = scale else my_scale = 1._amrex_real end if - call amrex_fi_flash_fluxregister_store_area(this%p, grid_idx, dir, & - flux, flo, fhi, fhi(4)-flo(4)+1, area, my_scale) + if (my_addit) then + call amrex_fi_flash_fluxregister_add_area(this%p, grid_idx, dir, & + flux, flo, fhi, fhi(4)-flo(4)+1, area, my_scale) + else + call amrex_fi_flash_fluxregister_store_area(this%p, grid_idx, dir, & + flux, flo, fhi, fhi(4)-flo(4)+1, area, my_scale) + end if end subroutine amrex_flash_fluxregister_store_area subroutine amrex_flash_fluxregister_store_area_ifd (this, flux, area, flo, fhi, & - isFluxDensity, grid_idx, dir, scale) + isFluxDensity, grid_idx, dir, addit, scale) class(amrex_flash_fluxregister), intent(inout) :: this integer, intent(in) :: flo(*), fhi(*), grid_idx, dir real(amrex_real), intent(in) :: flux(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3),flo(4):fhi(4)) real(amrex_real), intent(in) :: area(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3)) logical, intent(in) :: isFluxDensity(flo(4):fhi(4)) + logical, optional, intent(in) :: addit real(amrex_real), optional, intent(in) :: scale ! + logical :: my_addit real(amrex_real) :: my_scale integer(c_int) :: ifd(flo(4):fhi(4)) + if (present(addit)) then + my_addit = addit + else + my_addit = .FALSE. + end if if (present(scale)) then my_scale = scale else @@ -240,8 +300,13 @@ subroutine amrex_flash_fluxregister_store_area_ifd (this, flux, area, flo, fhi, elsewhere ifd = 0 endwhere - call amrex_fi_flash_fluxregister_store_area_ifd(this%p, grid_idx, dir, & - flux, flo, fhi, fhi(4)-flo(4)+1, area, ifd, my_scale) + if (my_addit) then + call amrex_fi_flash_fluxregister_add_area_ifd(this%p, grid_idx, dir, & + flux, flo, fhi, fhi(4)-flo(4)+1, area, ifd, my_scale) + else + call amrex_fi_flash_fluxregister_store_area_ifd(this%p, grid_idx, dir, & + flux, flo, fhi, fhi(4)-flo(4)+1, area, ifd, my_scale) + end if end subroutine amrex_flash_fluxregister_store_area_ifd subroutine amrex_flash_fluxregister_load_1 (this, flux, flo, fhi, grid_idx, dir, scale) From ae7b64bcf6a4ed36dd03e17357cae83b0a394912 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 1 Nov 2023 08:55:58 -0700 Subject: [PATCH 027/186] Update CHANGES for 23.11 (#3613) --- CHANGES | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/CHANGES b/CHANGES index ae56cdd5ffe..a7a22fd97bd 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,52 @@ +# 23.11 + + -- Give FlashFluxRegisters ways to accumulate data in registers (#3597) + + -- `AMReXBuildInfo.cmake`: AMReX_DIR (#3609) + + -- update doc for amrex::Abort on GPU (#3605) + + -- Add runtime particle components to HDF5 wrapper (#3596) + + -- Windows: Fix Installed AMReXBuildInfo.cmake (#3606) + + -- Print AMReX version at the beginning of Initialize (#3604) + + -- Install Move Tools to `shared/amrex` (#3599) + + -- Revert "Add ability for GCC 8 in CMake to build fgradient which uses std::filesystem" (#3601) + + -- Avoid std::filesystem (#3602) + + -- Fix Assertion in MLEBNodeFDLaplacian (#3594) + + -- Fix a memory "leak" in VisMF's persistent streams (#3592) + + -- RealVect Static: Export (#3589) + + -- change MaxCnt from 4 to max(4,max_level+1) for how many iterations we… (#3588) + … allow in creation of the initial grid hierarchy + + -- Add Bittree CI (#3577) + + -- BCType::ext_dir_cc (#3581) + + -- Disable CCache in Windows CIs (#3566) + + -- Fix ICC CI by Freeing up Disk Space (#3583) + + -- Docs: Link pyAMReX (#3582) + + -- NodeABecLaplacian: Reuse (#3579) + + -- simplify how 2d surface integrals are computed (#3571) + + -- Adding bittree interface to improve regridding performance in octree mode (#3555) + + -- MLNodeABecLaplacian (#3559) + + -- Fix Boundary Centroid in a Corner Case in 2D (#3568) + # 23.10 -- Bugfix typo in AMReX_SundialsIntegrator.H Nvar vs NVar, the From 1269a5bf41a8c4f0a832581532fda6e723d9aadc Mon Sep 17 00:00:00 2001 From: Rakesh Roy <137397847+rakesroy@users.noreply.github.com> Date: Thu, 2 Nov 2023 00:17:30 +0530 Subject: [PATCH 028/186] use hipPointerAttribute_t.type as HIP is removing hipPointerAttribute_t.memoryType (#3610) ## Summary This replaces hipPointerAttribute_t.memoryType with hipPointerAttribute_t.type. ## Additional background In ROCm6.0 hipPointerAttribute_t.memoryType will be removed from HIP. Instead hipPointerAttribute_t.type to be used. This is causing build failure in https://github.com/Exawind/amr-wind.git. hipPointerAttribute_t.type has been existing since ROCm5.5, so this change will be backward compatible till ROCm5.5. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate Co-authored-by: Rakesh Roy --- Src/Base/AMReX_GpuUtility.H | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Src/Base/AMReX_GpuUtility.H b/Src/Base/AMReX_GpuUtility.H index a1fa3cdd9dc..ce98556fc5c 100644 --- a/Src/Base/AMReX_GpuUtility.H +++ b/Src/Base/AMReX_GpuUtility.H @@ -63,7 +63,11 @@ namespace Gpu { #if defined(AMREX_USE_HIP) hipPointerAttribute_t attrib; hipError_t r = hipPointerGetAttributes(&attrib, p); +#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6) return r == hipSuccess && attrib.memoryType == hipMemoryTypeDevice; +#else + return r == hipSuccess && attrib.type == hipMemoryTypeDevice; +#endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6) #elif defined(AMREX_USE_CUDA) CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE; CUmemorytype mem_type = static_cast(0); @@ -83,7 +87,11 @@ namespace Gpu { #if defined(AMREX_USE_HIP) hipPointerAttribute_t attrib; hipError_t r = hipPointerGetAttributes(&attrib, p); +#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6) return r == hipSuccess && attrib.memoryType == hipMemoryTypeHost; +#else + return r == hipSuccess && attrib.type == hipMemoryTypeHost; +#endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6) #elif defined(AMREX_USE_CUDA) CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE; CUmemorytype mem_type = static_cast(0); @@ -106,9 +114,15 @@ namespace Gpu { } else { hipPointerAttribute_t attrib; hipError_t r = hipPointerGetAttributes(&attrib, p); +#if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6) return r == hipSuccess && (attrib.memoryType == hipMemoryTypeHost || attrib.memoryType == hipMemoryTypeDevice); +#else + return r == hipSuccess && + (attrib.type == hipMemoryTypeHost || + attrib.type == hipMemoryTypeDevice); +#endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6) } #elif defined(AMREX_USE_CUDA) CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE; From f5400421a4b1ba84be3dda98040a49ff2ef89149 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 1 Nov 2023 12:02:02 -0700 Subject: [PATCH 029/186] FillRandom: Use MKL host API (#3536) --- Src/Base/AMReX_Random.cpp | 40 ++++++++++++----------------------- Tools/CMake/AMReXSYCL.cmake | 2 +- Tools/GNUMake/comps/dpcpp.mak | 2 +- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/Src/Base/AMReX_Random.cpp b/Src/Base/AMReX_Random.cpp index 9e1059e6798..cc791a11fef 100644 --- a/Src/Base/AMReX_Random.cpp +++ b/Src/Base/AMReX_Random.cpp @@ -19,7 +19,7 @@ namespace namespace amrex { #ifdef AMREX_USE_SYCL sycl_rng_descr* rand_engine_descr = nullptr; -//xxxxx oneapi::mkl::rng::philox4x32x10* gpu_rand_generator = nullptr; + oneapi::mkl::rng::philox4x32x10* gpu_rand_generator = nullptr; #else amrex::randState_t* gpu_rand_state = nullptr; amrex::randGenerator_t gpu_rand_generator = nullptr; @@ -44,8 +44,8 @@ void ResizeRandomSeed (amrex::ULong gpu_seed) rand_engine_descr = new sycl_rng_descr (Gpu::Device::streamQueue(), sycl::range<1>(N), gpu_seed, 1); -//xxxxx gpu_rand_generator = new std::remove_pointer_t -// (Gpu::Device::streamQueue(), gpu_seed+1234ULL); + gpu_rand_generator = new std::remove_pointer_t + (Gpu::Device::streamQueue(), gpu_seed+1234ULL); #elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) @@ -212,11 +212,11 @@ DeallocateRandomSeedDevArray () Gpu::streamSynchronize(); rand_engine_descr = nullptr; } -//xxxxx if (gpu_rand_generator != nullptr) { -// delete gpu_rand_generator; -// Gpu::streamSynchronize(); -// gpu_rand_generator = nullptr; -// } + if (gpu_rand_generator != nullptr) { + delete gpu_rand_generator; + Gpu::streamSynchronize(); + gpu_rand_generator = nullptr; + } #else if (gpu_rand_state != nullptr) { @@ -258,15 +258,9 @@ void FillRandom (Real* p, Long N) #elif defined(AMREX_USE_SYCL) -//xxxxx oneapi::mkl::rng::uniform distr; -// auto event = oneapi::mkl::rng::generate(distr, gpu_rand_generator, N, p); -// event.wait(); - - amrex::ParallelForRNG(N, [=] AMREX_GPU_DEVICE (Long i, RandomEngine const& eng) - { - p[i] = Random(eng); - }); - Gpu::streamSynchronize(); + oneapi::mkl::rng::uniform distr; + auto event = oneapi::mkl::rng::generate(distr, *gpu_rand_generator, N, p); + event.wait(); #else std::uniform_real_distribution distribution(Real(0.0), Real(1.0)); @@ -299,15 +293,9 @@ void FillRandomNormal (Real* p, Long N, Real mean, Real stddev) #elif defined(AMREX_USE_SYCL) -//xxxxx oneapi::mkl::rng::gaussian distr(mean, stddev); -// auto event = oneapi::mkl::rng::generate(distr, gpu_rand_generator, N, p); -// event.wait(); - - amrex::ParallelForRNG(N, [=] AMREX_GPU_DEVICE (Long i, RandomEngine const& eng) - { - p[i] = RandomNormal(mean, stddev, eng); - }); - Gpu::streamSynchronize(); + oneapi::mkl::rng::gaussian distr(mean, stddev); + auto event = oneapi::mkl::rng::generate(distr, *gpu_rand_generator, N, p); + event.wait(); #else diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake index 42eb5c4802b..a67571dc412 100644 --- a/Tools/CMake/AMReXSYCL.cmake +++ b/Tools/CMake/AMReXSYCL.cmake @@ -53,7 +53,7 @@ endif() # target_link_options( SYCL INTERFACE - $<${_cxx_sycl}:-fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64> ) + $<${_cxx_sycl}:-qmkl=sequential -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64> ) # TODO: use $ genex for CMake >=3.17 diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak index 6e490d9c063..3bcf5cb4372 100644 --- a/Tools/GNUMake/comps/dpcpp.mak +++ b/Tools/GNUMake/comps/dpcpp.mak @@ -123,7 +123,7 @@ ifneq ($(BL_NO_FORT),TRUE) endif endif -LDFLAGS += -fsycl-device-lib=libc,libm-fp32,libm-fp64 +LDFLAGS += -qmkl=sequential -fsycl-device-lib=libc,libm-fp32,libm-fp64 ifdef SYCL_PARALLEL_LINK_JOBS LDFLAGS += -fsycl-max-parallel-link-jobs=$(SYCL_PARALLEL_LINK_JOBS) From 606a94c69d640a03d5fbf35d11f1c46bf9676e47 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 1 Nov 2023 15:11:03 -0700 Subject: [PATCH 030/186] [Breaking] Prefix `amrex_` to each plotfile Tool (#3600) ## Summary Packaging AMReX for Conda, we received feedback that our plotfile tool names are too generic and can collide with other packages. Thus, we propose to rename them with a common prefix. - [x] decide prefix or suffix (suffix: `` completion will work; prefix: easier for new users) - [x] decide name: `plt`/`amrex`/`amr`/... - Weiqun and Axel prefer `amrex_` so far - [ ] also change for GNUmake - [ ] update [test harness](https://github.com/AMReX-Codes/regression_testing) - [ ] update user-facing documentation for tools - [x] ~update [tutorials](https://github.com/AMReX-Codes/amrex-tutorials)?~ ## Additional background https://github.com/conda-forge/staged-recipes/pull/24294 ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Weiqun Zhang --- .github/workflows/clang.yml | 2 +- .github/workflows/gcc.yml | 2 +- Tools/Plotfile/CMakeLists.txt | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index eea7e576af6..ec469bb5de6 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -64,7 +64,7 @@ jobs: CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" export PATH=/tmp/my-amrex/bin:$PATH - which fcompare + which amrex_fcompare ctest --output-on-failure diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 6915a246018..afc2044bdd7 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -60,7 +60,7 @@ jobs: CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" export PATH=/tmp/my-amrex/bin:$PATH - which fcompare + which amrex_fcompare ctest --output-on-failure diff --git a/Tools/Plotfile/CMakeLists.txt b/Tools/Plotfile/CMakeLists.txt index 0efc55c9a16..f462445c952 100644 --- a/Tools/Plotfile/CMakeLists.txt +++ b/Tools/Plotfile/CMakeLists.txt @@ -27,6 +27,11 @@ foreach( _exe IN LISTS _exe_names) if (AMReX_CUDA) set_source_files_properties(${_exe}.cpp PROPERTIES LANGUAGE CUDA) endif() + + # Add prefix to each tool's name to make them unique when installed. + # This avoids potential collisions of names on user systems, e.g., in + # software packages (Spack/Conda/Debian/...). + set_target_properties(${_exe} PROPERTIES OUTPUT_NAME "amrex_${_exe}") endforeach() From a7afcba3cffd86acc748edf39a8a2f33c973bf5d Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 2 Nov 2023 08:45:19 -0700 Subject: [PATCH 031/186] `amrex.omp_threads`: Can Avoid SMT (#3607) ## Summary In all our applications in BLAST, the OpenMP default to use all [logical cores on modern CPUs](https://en.wikipedia.org/wiki/Simultaneous_multithreading) results in significantly slower performance than just using the physical cores with AMReX. Thus, we introduce a new option `amrex.omp_threads` that enables control over the OpenMP threads at startup and has - for most popular systems - an implementation to find out the actual number of physical threads and default to it. For codes, users that change the default to `amrex.omp_threads = nosmt`, the `OMP_NUM_THREADS` variable will still take precedence. This is a bit unusual (because CLI options usually have higher precedence than env vars - and they do if the user provides a number here), but done intentionally: this way, codes like WarpX can set the `nosmt` default and HPC job scripts will set the exact, preferably benchmarked number of threads as usual without surprises. - [x] document ## Tests Performed for AMReX OMP Backend Tests were performed with very small examples, WarpX 3D LWFA test as checked in or AMReX AMRCore 3d test. - [x] Ubuntu 22.04 Laptop w/ 12th Gen Intel i9-12900H: @ax3l - 20 logical cores; the first 12 logical cores use 2x SMT/HT - 20 virtual (default) -> 14 physical (`amrex.omp_threads = nosmt`) - faster runtime! - [x] Perlmutter (SUSE Linux Enterprise 15.4, kernel 5.14.21) - [CPU node](https://docs.nersc.gov/systems/perlmutter/architecture/) with 2x [AMD EPYC 7763](https://www.amd.com/en/products/cpu/amd-epyc-7763) - 2x SMT - 256 default, 128 with `amrex.omp_threads = nosmt` - faster runtime! - [x] Frontier (SUSE Linux Enterprise 15.4, kernel 5.14.21) - 1x AMD EPYC 7763 64-Core Processor (w/ 2x SMT enabled) - 2x SMT - 128 default - 64 with `amrex.omp_threads = nosmt` - faster runtime! - The ideal result might also be lower, due to first cores used by OS and [low-noise cores](https://docs.olcf.ornl.gov/systems/frontier_user_guide.html#low-noise-mode-layout) after that. But that is an orthogonal question and should be set in job scripts: `#SBATCH --ntasks-per-node=8` `#SBATCH --cpus-per-task=7` `#SBATCH --gpus-per-task=1` - [x] Summit (RHEL 8.2, kernel 4.18.0) - 2x IBM Power9 (each 22 physical cores each, each 6 disabled/hidden for OS?, 4x SMT enabled; cpuinfo says 128 total) - 4x SMT - 128 default, 32 with `amrex.omp_threads = nosmt` - faster runtime! - [x] [Lassen](https://hpc.llnl.gov/hardware/compute-platforms/lassen) (RHEL 7.9, kernel 4.14.0) - 2x IBM Power9 (each 22 physical cores, each 2 reserved for OS?, 4x SMT enabled) - 4x SMT - 160 default, 44 with `amrex.omp_threads = nosmt` - faster runtime! - The ideal result might be even down to 40, but that is an orthogonal question and should be set in job scripts. - [x] macOS M1 (arm64/aarch64) mini: - no SMT/HT - 8 default, 8 with `amrex.omp_threads = nosmt` - [x] macOS (OSX Ventura 13.5.2, 2.8 GHz Quad-Core Intel Core i7-8569U) Intel x86_64 @n01r - 2x SMT - 8 default, 4 with `amrex.omp_threads = nosmt` - faster runtime! - [x] macOS (OSX Ventura 13.5.2) M1 Max on mac studio @RTSandberg - no SMT/HT - 10 default, 10 with `amrex.omp_threads = nosmt` - [ ] some BSD/FreeBSD system? - no user requests - low priority, we just keep the default for now - [ ] Windows... looking for a system ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Weiqun Zhang --- .../source/InputsComputeBackends.rst | 21 +++ .../source/Inputs_Chapter.rst | 1 + Src/Base/AMReX.cpp | 15 +- Src/Base/AMReX_OpenMP.H | 15 +- Src/Base/AMReX_OpenMP.cpp | 177 ++++++++++++++++++ Src/Base/CMakeLists.txt | 1 + Src/Base/Make.package | 1 + 7 files changed, 224 insertions(+), 7 deletions(-) create mode 100644 Docs/sphinx_documentation/source/InputsComputeBackends.rst create mode 100644 Src/Base/AMReX_OpenMP.cpp diff --git a/Docs/sphinx_documentation/source/InputsComputeBackends.rst b/Docs/sphinx_documentation/source/InputsComputeBackends.rst new file mode 100644 index 00000000000..26e5d527508 --- /dev/null +++ b/Docs/sphinx_documentation/source/InputsComputeBackends.rst @@ -0,0 +1,21 @@ +.. _Chap:InputsComputeBackends: + +Compute Backends +================ + +The following inputs must be preceded by ``amrex.`` and determine runtime options of CPU or GPU compute implementations. + ++------------------------+-----------------------------------------------------------------------+-------------+------------+ +| Parameter | Description | Type | Default | ++========================+=======================================================================+=============+============+ +| ``omp_threads`` | If OpenMP is enabled, this can be used to set the default number of | String | ``system`` | +| | threads. The special value ``nosmt`` can be used to avoid using | or Int | | +| | threads for virtual cores (aka Hyperthreading or SMT), as is default | | | +| | in OpenMP, and instead only spawns threads equal to the number of | | | +| | physical cores in the system. | | | +| | For the values ``system`` and ``nosmt``, the environment variable | | | +| | ``OMP_NUM_THREADS`` takes precedence. For Integer values, | | | +| | ``OMP_NUM_THREADS`` is ignored. | | | ++------------------------+-----------------------------------------------------------------------+-------------+------------+ + +For GPU-specific parameters, see also the :ref:`GPU chapter `. diff --git a/Docs/sphinx_documentation/source/Inputs_Chapter.rst b/Docs/sphinx_documentation/source/Inputs_Chapter.rst index 0a64aeb492c..43ead40b3c6 100644 --- a/Docs/sphinx_documentation/source/Inputs_Chapter.rst +++ b/Docs/sphinx_documentation/source/Inputs_Chapter.rst @@ -9,6 +9,7 @@ Run-time Inputs InputsProblemDefinition InputsTimeStepping InputsLoadBalancing + InputsComputeBackends InputsPlotFiles InputsCheckpoint diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp index 147f8275c57..4449dab1955 100644 --- a/Src/Base/AMReX.cpp +++ b/Src/Base/AMReX.cpp @@ -52,6 +52,7 @@ #endif #ifdef AMREX_USE_OMP +#include #include #endif @@ -72,7 +73,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -459,15 +462,17 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, #endif #ifdef AMREX_USE_OMP + amrex::OpenMP::init_threads(); + + // status output if (system::verbose > 0) { // static_assert(_OPENMP >= 201107, "OpenMP >= 3.1 is required."); amrex::Print() << "OMP initialized with " << omp_get_max_threads() << " OMP threads\n"; } -#endif -#if defined(AMREX_USE_MPI) && defined(AMREX_USE_OMP) + // warn if over-subscription is detected if (system::verbose > 0) { auto ncores = int(std::thread::hardware_concurrency()); if (ncores != 0 && // It might be zero according to the C++ standard. @@ -476,8 +481,10 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, amrex::Print(amrex::ErrorStream()) << "AMReX Warning: You might be oversubscribing CPU cores with OMP threads.\n" << " There are " << ncores << " cores per node.\n" - << " There are " << ParallelDescriptor::NProcsPerNode() << " MPI ranks per node.\n" - << " But OMP is initialized with " << omp_get_max_threads() << " threads per rank.\n" +#if defined(AMREX_USE_MPI) + << " There are " << ParallelDescriptor::NProcsPerNode() << " MPI ranks (processes) per node.\n" +#endif + << " But OMP is initialized with " << omp_get_max_threads() << " threads per process.\n" << " You should consider setting OMP_NUM_THREADS=" << ncores/ParallelDescriptor::NProcsPerNode() << " or less in the environment.\n"; } diff --git a/Src/Base/AMReX_OpenMP.H b/Src/Base/AMReX_OpenMP.H index 8eb8ada4513..ce267b9be73 100644 --- a/Src/Base/AMReX_OpenMP.H +++ b/Src/Base/AMReX_OpenMP.H @@ -11,10 +11,12 @@ namespace amrex::OpenMP { inline int get_max_threads () { return omp_get_max_threads(); } inline int get_thread_num () { return omp_get_thread_num(); } inline int in_parallel () { return omp_in_parallel(); } + inline void set_num_threads (int num) { omp_set_num_threads(num); } + void init_threads (); } -#else +#else // AMREX_USE_OMP namespace amrex::OpenMP { @@ -22,9 +24,16 @@ namespace amrex::OpenMP { constexpr int get_max_threads () { return 1; } constexpr int get_thread_num () { return 0; } constexpr int in_parallel () { return false; } - + constexpr void set_num_threads (int) { /* nothing */ } + constexpr void init_threads () { /* nothing */ } } -#endif +#endif // AMREX_USE_OMP + +namespace amrex { + /** ... */ + int + numUniquePhysicalCores(); +} #endif diff --git a/Src/Base/AMReX_OpenMP.cpp b/Src/Base/AMReX_OpenMP.cpp new file mode 100644 index 00000000000..5ddd9944411 --- /dev/null +++ b/Src/Base/AMReX_OpenMP.cpp @@ -0,0 +1,177 @@ +#include +#include +#include +#include + +#if defined(__APPLE__) +#include +#include +#endif + +#if defined(_WIN32) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace amrex +{ + int + numUniquePhysicalCores () + { + int ncores; + +#if defined(__APPLE__) + size_t len = sizeof(ncores); + // See hw.physicalcpu and hw.physicalcpu_max + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_system_capabilities/ + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname + if (sysctlbyname("hw.physicalcpu", &ncores, &len, NULL, 0) == -1) { + if (system::verbose > 0) { + amrex::Print() << "numUniquePhysicalCores(): Error receiving hw.physicalcpu! " + << "Defaulting to visible cores.\n"; + } + ncores = int(std::thread::hardware_concurrency()); + } +#elif defined(__linux__) + std::set> uniqueThreadSets; + int cpuIndex = 0; + + while (true) { + // for each logical CPU in cpuIndex from 0...N-1 + std::string path = "/sys/devices/system/cpu/cpu" + std::to_string(cpuIndex) + "/topology/thread_siblings_list"; + std::ifstream file(path); + if (!file.is_open()) { + break; // no further CPUs to check + } + + // find its siblings + std::vector siblings; + std::string line; + if (std::getline(file, line)) { + std::stringstream ss(line); + std::string token; + + // Possible syntax: 0-3, 8-11, 14,17 + // https://github.com/torvalds/linux/blob/v6.5/Documentation/ABI/stable/sysfs-devices-system-cpu#L68-L72 + while (std::getline(ss, token, ',')) { + size_t dashPos = token.find('-'); + if (dashPos != std::string::npos) { + // Range detected + int start = std::stoi(token.substr(0, dashPos)); + int end = std::stoi(token.substr(dashPos + 1)); + for (int i = start; i <= end; ++i) { + siblings.push_back(i); + } + } else { + siblings.push_back(std::stoi(token)); + } + } + } + + // and record the siblings group + // (assumes: ascending and unique sets per cpuIndex) + uniqueThreadSets.insert(siblings); + cpuIndex++; + } + + if (cpuIndex == 0) { + if (system::verbose > 0) { + amrex::Print() << "numUniquePhysicalCores(): Error reading CPU info.\n"; + } + ncores = int(std::thread::hardware_concurrency()); + } else { + ncores = int(uniqueThreadSets.size()); + } +#elif defined(_WIN32) + DWORD length = 0; + bool result = GetLogicalProcessorInformation(NULL, &length); + + if (!result) { + if (system::verbose > 0) { + amrex::Print() << "numUniquePhysicalCores(): Failed to get logical processor information! " + << "Defaulting to visible cores.\n"; + } + ncores = int(std::thread::hardware_concurrency()); + } + else { + std::vector buffer(length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)); + if (!GetLogicalProcessorInformation(&buffer[0], &length)) { + if (system::verbose > 0) { + amrex::Print() << "numUniquePhysicalCores(): Failed to get logical processor information! " + << "Defaulting to visible cores.\n"; + } + ncores = int(std::thread::hardware_concurrency()); + } else { + ncores = 0; + for (const auto& info : buffer) { + if (info.Relationship == RelationProcessorCore) { + ncores++; + } + } + } + } +#else + // TODO: + // BSD + if (system::verbose > 0) { + amrex::Print() << "numUniquePhysicalCores(): Unknown system. Defaulting to visible cores.\n"; + } + ncores = int(std::thread::hardware_concurrency()); +#endif + return ncores; + } +} // namespace amrex + +#ifdef AMREX_USE_OMP +namespace amrex::OpenMP +{ + void init_threads () + { + amrex::ParmParse pp("amrex"); + std::string omp_threads = "system"; + pp.queryAdd("omp_threads", omp_threads); + + auto to_int = [](std::string const & str_omp_threads) { + std::optional num; + try { num = std::stoi(str_omp_threads); } + catch (...) { /* nothing */ } + return num; + }; + + if (omp_threads == "system") { + // default or OMP_NUM_THREADS environment variable + } else if (omp_threads == "nosmt") { + char const *env_omp_num_threads = std::getenv("OMP_NUM_THREADS"); + if (env_omp_num_threads != nullptr && amrex::system::verbose > 1) { + amrex::Print() << "amrex.omp_threads was set to nosmt," + << "but OMP_NUM_THREADS was set. Will keep " + << "OMP_NUM_THREADS=" << env_omp_num_threads << ".\n"; + } else { + omp_set_num_threads(numUniquePhysicalCores()); + } + } else { + std::optional num_omp_threads = to_int(omp_threads); + if (num_omp_threads.has_value()) { + omp_set_num_threads(num_omp_threads.value()); + } + else { + if (amrex::system::verbose > 0) { + amrex::Print() << "amrex.omp_threads has an unknown value: " + << omp_threads + << " (try system, nosmt, or a positive integer)\n"; + } + } + } + } +} // namespace amrex::OpenMP +#endif // AMREX_USE_OMP diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index 544de3aed8c..459ec3bd7c4 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -53,6 +53,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_ParallelDescriptor.H AMReX_ParallelDescriptor.cpp AMReX_OpenMP.H + AMReX_OpenMP.cpp AMReX_ParallelReduce.H AMReX_ForkJoin.H AMReX_ForkJoin.cpp diff --git a/Src/Base/Make.package b/Src/Base/Make.package index 29b4c25dc84..276887ebd79 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -38,6 +38,7 @@ C$(AMREX_BASE)_headers += AMReX_REAL.H AMReX_INT.H AMReX_CONSTANTS.H AMReX_SPACE C$(AMREX_BASE)_sources += AMReX_DistributionMapping.cpp AMReX_ParallelDescriptor.cpp C$(AMREX_BASE)_headers += AMReX_DistributionMapping.H AMReX_ParallelDescriptor.H C$(AMREX_BASE)_headers += AMReX_OpenMP.H +C$(AMREX_BASE)_sources += AMReX_OpenMP.cpp C$(AMREX_BASE)_headers += AMReX_ParallelReduce.H From d36463103daed09a40cdea235041a6ab79ff280c Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Tue, 7 Nov 2023 00:19:10 -0500 Subject: [PATCH 032/186] replace AMREX_DEVICE_COMPILE with AMREX_IF_ON_DEVICE and AMREX_IF_ON_HOST (#3591) ## Summary This adds the macros `AMREX_IF_ON_DEVICE((code_for_device))` and `AMREX_IF_ON_HOST((code_for_host))` that are compatible with single-pass host/device compilation (as used by `nvc++ -cuda`), as well as backward compatible with all other compilers. This also replaces all uses of `AMREX_DEVICE_COMPILE` with these macros. Fixes https://github.com/AMReX-Codes/amrex/issues/3586. ## Additional background Single-pass compilation evalutes the preprocessor macros once for each source file. This means that preprocessor conditionals cannot be used to choose between host and device code. In particular, NVHPC with `-cuda` does not support `__CUDA_ARCH__`, instead requiring the use of the `if target` construct. This creates portable macros that work for either single-pass or two-pass compilation, but requires restructuring of any code that uses AMREX_DEVICE_COMPILE so that the code appears as a macro argument. This PR will allow using NVHPC with `-cuda` as the unified host/device compiler for AMReX. In the future, single-pass compilers for other backends may be available, e.g., SYCL (https://dl.acm.org/doi/abs/10.1145/3585341.3585351). AMReX can be configured to build with `nvc++ -cuda` using CMake: ``` cmake .. -DAMReX_GPU_BACKEND=CUDA -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CUDA_COMPILER=nvc++ -DCMAKE_CUDA_COMPILER_ID=NVCXX -DCMAKE_CUDA_ARCHITECTURES=80 -DCMAKE_CUDA_COMPILER_FORCED=ON -DCMAKE_CUDA_COMPILE_FEATURES=cuda_std_17 -DAMReX_GPU_RDC=OFF -DCMAKE_CXX_FLAGS="-cuda --gcc-toolchain=$(which gcc)" -DCMAKE_CUDA_FLAGS="-cuda --gcc-toolchain=$(which gcc)" -DAMReX_ENABLE_TESTS=ON -DCMAKE_CUDA_HOST_LINK_LAUNCHER=nvc++ -DCMAKE_CUDA_LINK_EXECUTABLE=" -o " ``` CMake hacks (https://github.com/NVIDIA/cub/blob/0fc3c3701632a4be906765b73be20a9ad0da603d/cmake/CubCompilerHacks.cmake) are tested with CMake 3.22.1 and NVHPC 23.5, 23.7, and 23.9 (earlier versions do not work). However, it currently fails to link the executables for the tests due to a [compiler/linker bug](https://forums.developer.nvidia.com/t/nvc-cuda-fails-to-link-code-when-using-device-curand-functions/270401/5). (Note that by default, `nvcc` preserves denormals, whereas `nvc++` does not. Also, `nvc++` generates relocatable device code by default, whereas `nvcc` does not.) ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Weiqun Zhang --- Docs/sphinx_documentation/source/GPU.rst | 9 +- Src/Base/AMReX.H | 60 +++---- Src/Base/AMReX_Algorithm.H | 178 +++++++++++--------- Src/Base/AMReX_Array4.H | 46 ++--- Src/Base/AMReX_FabArray.H | 7 +- Src/Base/AMReX_GpuAtomic.H | 204 ++++++++++++----------- Src/Base/AMReX_GpuLaunch.H | 29 ++-- Src/Base/AMReX_GpuQualifiers.H | 29 ++++ Src/Base/AMReX_GpuRange.H | 52 +++--- Src/Base/AMReX_GpuUtility.H | 10 +- Src/Base/AMReX_Math.H | 76 +++++---- Src/Base/AMReX_Random.H | 109 ++++++------ Src/Base/AMReX_TableData.H | 100 +++++------ Src/Base/Parser/AMReX_IParser.H | 28 +--- Src/Base/Parser/AMReX_Parser.H | 35 ++-- Src/Base/Parser/AMReX_Parser_Y.H | 4 +- Src/EB/AMReX_EB2_GeometryShop.H | 13 +- 17 files changed, 518 insertions(+), 471 deletions(-) diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index 90dbcc9f26f..aff060e9166 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -489,11 +489,10 @@ GPU support. When AMReX is compiled with ``USE_OMP_OFFLOAD=TRUE``, ``AMREX_USE_OMP_OFFLOAD`` is defined. -In addition to AMReX's preprocessor macros, CUDA provides the -``__CUDA_ARCH__`` macro which is only defined when in device code. -HIP and Sycl provide similar macros. -``AMREX_DEVICE_COMPILE`` should be used when a ``__host__ __device__`` -function requires separate code for the CPU and GPU implementations. +The macros ``AMREX_IF_ON_DEVICE((code_for_device))`` and +``AMREX_IF_ON_HOST((code_for_host))`` should be used when a +``__host__ __device__`` function requires separate code for the +CPU and GPU implementations. .. =================================================================== diff --git a/Src/Base/AMReX.H b/Src/Base/AMReX.H index c539a1d8e75..2b88553bcdf 100644 --- a/Src/Base/AMReX.H +++ b/Src/Base/AMReX.H @@ -113,16 +113,15 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Error (const char* msg = nullptr) { -#if AMREX_DEVICE_COMPILE #if defined(NDEBUG) - amrex::ignore_unused(msg); + AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);)) #else - if (msg) { AMREX_DEVICE_PRINTF("Error %s\n", msg); } - AMREX_DEVICE_ASSERT(0); -#endif -#else - Error_host("Error", msg); + AMREX_IF_ON_DEVICE(( + if (msg) { AMREX_DEVICE_PRINTF("Error %s\n", msg); } + AMREX_DEVICE_ASSERT(0); + )) #endif + AMREX_IF_ON_HOST((Error_host("Error", msg);)) } //! Print out warning message to cerr. @@ -132,15 +131,12 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Warning (const char * msg) { -#if AMREX_DEVICE_COMPILE #if defined(NDEBUG) - amrex::ignore_unused(msg); -#else - if (msg) { AMREX_DEVICE_PRINTF("Warning %s\n", msg); } -#endif + AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);)) #else - Warning_host(msg); + AMREX_IF_ON_DEVICE((if (msg) { AMREX_DEVICE_PRINTF("Warning %s\n", msg); })) #endif + AMREX_IF_ON_HOST((Warning_host(msg);)) } //! Print out message to cerr and exit via abort(). @@ -148,16 +144,15 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Abort (const char * msg = nullptr) { -#if AMREX_DEVICE_COMPILE #if defined(NDEBUG) - amrex::ignore_unused(msg); + AMREX_IF_ON_DEVICE((amrex::ignore_unused(msg);)) #else - if (msg) { AMREX_DEVICE_PRINTF("Abort %s\n", msg); } - AMREX_DEVICE_ASSERT(0); -#endif -#else - Error_host("Abort", msg); + AMREX_IF_ON_DEVICE(( + if (msg) { AMREX_DEVICE_PRINTF("Abort %s\n", msg); } + AMREX_DEVICE_ASSERT(0); + )) #endif + AMREX_IF_ON_HOST((Error_host("Abort", msg);)) } /** @@ -170,22 +165,21 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Assert (const char* EX, const char* file, int line, const char* msg = nullptr) { -#if AMREX_DEVICE_COMPILE #if defined(NDEBUG) - amrex::ignore_unused(EX,file,line,msg); -#else - if (msg) { - AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d, Msg: %s", - EX, file, line, msg); - } else { - AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d", - EX, file, line); - } - AMREX_DEVICE_ASSERT(0); -#endif + AMREX_IF_ON_DEVICE((amrex::ignore_unused(EX,file,line,msg);)) #else - Assert_host(EX,file,line,msg); + AMREX_IF_ON_DEVICE(( + if (msg) { + AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d, Msg: %s", + EX, file, line, msg); + } else { + AMREX_DEVICE_PRINTF("Assertion `%s' failed, file \"%s\", line %d", + EX, file, line); + } + AMREX_DEVICE_ASSERT(0); + )) #endif + AMREX_IF_ON_HOST((Assert_host(EX,file,line,msg);)) } /** diff --git a/Src/Base/AMReX_Algorithm.H b/Src/Base/AMReX_Algorithm.H index b418f3cc1c0..31889df4425 100644 --- a/Src/Base/AMReX_Algorithm.H +++ b/Src/Base/AMReX_Algorithm.H @@ -161,51 +161,52 @@ namespace amrex AMREX_GPU_HOST_DEVICE ItType upper_bound (ItType first, ItType last, const ValType& val) { -#if AMREX_DEVICE_COMPILE - std::ptrdiff_t count = last-first; - while(count>0){ - auto it = first; - const auto step = count/2; - it += step; - if (!(val < *it)){ - first = ++it; - count -= step + 1; + AMREX_IF_ON_DEVICE(( + std::ptrdiff_t count = last-first; + while(count>0){ + auto it = first; + const auto step = count/2; + it += step; + if (!(val < *it)){ + first = ++it; + count -= step + 1; + } + else{ + count = step; + } } - else{ - count = step; - } - } - - return first; -#else - return std::upper_bound(first, last, val); -#endif + return first; + )) + AMREX_IF_ON_HOST(( + return std::upper_bound(first, last, val); + )) } template AMREX_GPU_HOST_DEVICE ItType lower_bound (ItType first, ItType last, const ValType& val) { -#ifdef AMREX_DEVICE_COMPILE - std::ptrdiff_t count = last-first; - while(count>0) - { - auto it = first; - const auto step = count/2; - it += step; - if (*it < val){ - first = ++it; - count -= step + 1; - } - else{ - count = step; + AMREX_IF_ON_DEVICE(( + std::ptrdiff_t count = last-first; + while(count>0) + { + auto it = first; + const auto step = count/2; + it += step; + if (*it < val){ + first = ++it; + count -= step + 1; + } + else{ + count = step; + } } - } - return first; -#else - return std::lower_bound(first, last, val); -#endif + return first; + )) + AMREX_IF_ON_HOST(( + return std::lower_bound(first, last, val); + )) } namespace detail { @@ -239,83 +240,100 @@ int builtin_clz_wrapper (clzll_tag, T x) noexcept return static_cast(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT)); } -#ifdef AMREX_USE_CUDA - -// likewise with CUDA, there are __clz functions that take (signed) int and long long int -template ::type> -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -int clz_wrapper (clz_tag, T x) noexcept -{ - return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT); -} - -template ::type> -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -int clz_wrapper (clzll_tag, T x) noexcept -{ - return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT); } -#endif -} +template ,std::uint8_t> || + std::is_same_v,std::uint16_t> || + std::is_same_v,std::uint32_t> || + std::is_same_v,std::uint64_t>, int> = 0> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int clz (T x) noexcept; AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint8_t x) noexcept +int clz_generic (std::uint8_t x) noexcept { -#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz - return detail::clz_wrapper(detail::clz_tag{}, x); -#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) - return detail::builtin_clz_wrapper(detail::clz_tag{}, x); -#else static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; auto upper = x >> 4; auto lower = x & 0xF; return upper ? clz_lookup[upper] : 4 + clz_lookup[lower]; -#endif } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint16_t x) noexcept +int clz_generic (std::uint16_t x) noexcept { -#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz - return detail::clz_wrapper(detail::clz_tag{}, x); -#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) - return detail::builtin_clz_wrapper(detail::clz_tag{}, x); -#else auto upper = std::uint8_t(x >> 8); auto lower = std::uint8_t(x & 0xFF); return upper ? clz(upper) : 8 + clz(lower); -#endif } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint32_t x) noexcept +int clz_generic (std::uint32_t x) noexcept { -#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz - return detail::clz_wrapper(detail::clz_tag{}, x); -#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) - return detail::builtin_clz_wrapper(detail::clz_tag{}, x); -#else auto upper = std::uint16_t(x >> 16); auto lower = std::uint16_t(x & 0xFFFF); return upper ? clz(upper) : 16 + clz(lower); -#endif } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint64_t x) noexcept +int clz_generic (std::uint64_t x) noexcept { -#if (AMREX_DEVICE_COMPILE && defined(AMREX_USE_CUDA)) // all supported cuda versions have __clz - return detail::clz_wrapper(detail::clz_tag{}, x); -#elif (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) - return detail::builtin_clz_wrapper(detail::clz_tag{}, x); -#else auto upper = std::uint32_t(x >> 32); auto lower = std::uint32_t(x & 0xFFFFFFFF); return upper ? clz(upper) : 32 + clz(lower); +} + +#if defined AMREX_USE_CUDA + +namespace detail { + // likewise with CUDA, there are __clz functions that take (signed) int and long long int + template ::type> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + int clz_wrapper (clz_tag, T x) noexcept + { + return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } + + template ::type> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + int clz_wrapper (clzll_tag, T x) noexcept + { + return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } +} + +template ,std::uint8_t> || + std::is_same_v,std::uint16_t> || + std::is_same_v,std::uint32_t> || + std::is_same_v,std::uint64_t>, int> > +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int clz (T x) noexcept +{ + AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);)) +#if AMREX_HAS_BUILTIN_CLZ + AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);)) +#else + AMREX_IF_ON_HOST((return clz_generic(x);)) #endif } +#else // !defined AMREX_USE_CUDA + +template ,std::uint8_t> || + std::is_same_v,std::uint16_t> || + std::is_same_v,std::uint32_t> || + std::is_same_v,std::uint64_t>, int> > +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int clz (T x) noexcept +{ +#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) + return detail::builtin_clz_wrapper(detail::clz_tag{}, x); +#else + return clz_generic(x); +#endif +} + +#endif // defined AMREX_USE_CUDA + } #endif diff --git a/Src/Base/AMReX_Array4.H b/Src/Base/AMReX_Array4.H index b2ff0fcb549..8d7a4a44f30 100644 --- a/Src/Base/AMReX_Array4.H +++ b/Src/Base/AMReX_Array4.H @@ -42,13 +42,14 @@ namespace amrex { U& operator[] (int n) const noexcept { #if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) if (n < 0 || n >= ncomp) { -#if AMREX_DEVICE_COMPILE - AMREX_DEVICE_PRINTF(" %d is out of bound (0:%d)", n, ncomp-1); -#else - std::stringstream ss; - ss << " " << n << " is out of bound: (0:" << ncomp-1 << ")"; - amrex::Abort(ss.str()); -#endif + AMREX_IF_ON_DEVICE(( + AMREX_DEVICE_PRINTF(" %d is out of bound (0:%d)", n, ncomp-1); + )) + AMREX_IF_ON_HOST(( + std::stringstream ss; + ss << " " << n << " is out of bound: (0:" << ncomp-1 << ")"; + amrex::Abort(ss.str()); + )) } #endif return p[n*stride]; @@ -233,21 +234,22 @@ namespace amrex { { if (i=end.x || j=end.y || k=end.z || n < 0 || n >= ncomp) { -#if AMREX_DEVICE_COMPILE - AMREX_DEVICE_PRINTF(" (%d,%d,%d,%d) is out of bound (%d:%d,%d:%d,%d:%d,0:%d)\n", - i, j, k, n, begin.x, end.x-1, begin.y, end.y-1, - begin.z, end.z-1, ncomp-1); - amrex::Abort(); -#else - std::stringstream ss; - ss << " (" << i << "," << j << "," << k << "," << n - << ") is out of bound (" - << begin.x << ":" << end.x-1 << "," - << begin.y << ":" << end.y-1 << "," - << begin.z << ":" << end.z-1 << "," - << "0:" << ncomp-1 << ")"; - amrex::Abort(ss.str()); -#endif + AMREX_IF_ON_DEVICE(( + AMREX_DEVICE_PRINTF(" (%d,%d,%d,%d) is out of bound (%d:%d,%d:%d,%d:%d,0:%d)\n", + i, j, k, n, begin.x, end.x-1, begin.y, end.y-1, + begin.z, end.z-1, ncomp-1); + amrex::Abort(); + )) + AMREX_IF_ON_HOST(( + std::stringstream ss; + ss << " (" << i << "," << j << "," << k << "," << n + << ") is out of bound (" + << begin.x << ":" << end.x-1 << "," + << begin.y << ":" << end.y-1 << "," + << begin.z << ":" << end.z-1 << "," + << "0:" << ncomp-1 << ")"; + amrex::Abort(ss.str()); + )) } } #endif diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H index a8839a4bcc0..e507dab153b 100644 --- a/Src/Base/AMReX_FabArray.H +++ b/Src/Base/AMReX_FabArray.H @@ -198,11 +198,8 @@ struct MultiArray4 { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Array4 const& operator[] (int li) const noexcept { -#if AMREX_DEVICE_COMPILE - return dp[li]; -#else - return hp[li]; -#endif + AMREX_IF_ON_DEVICE((return dp[li];)) + AMREX_IF_ON_HOST((return hp[li];)) } #ifdef AMREX_USE_GPU diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index 55fc351156a..deea6ae932e 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -132,17 +132,17 @@ namespace detail { AMREX_GPU_DEVICE AMREX_FORCE_INLINE T Add_device (T* const sum, T const value) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return atomicAdd(sum, value); -#elif defined(__SYCL_DEVICE_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; constexpr auto ms = sycl::memory_scope::device; sycl::atomic_ref a{*sum}; return a.fetch_add(value); #else - amrex::ignore_unused(sum, value); - return T(); // should never get here, but have to return something + AMREX_IF_ON_DEVICE(( return atomicAdd(sum, value); )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(sum, value); + return T(); // should never get here, but have to return something + )) #endif } @@ -175,7 +175,7 @@ namespace detail { #endif -#if defined(AMREX_USE_CUDA) && (__CUDA_ARCH__ < 600) +#if defined(AMREX_USE_CUDA) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600) AMREX_GPU_DEVICE AMREX_FORCE_INLINE double Add_device (double* const sum, double const value) noexcept @@ -195,17 +195,16 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Add (T* sum, T value) noexcept { -#if AMREX_DEVICE_COMPILE #ifdef AMREX_USE_SYCL - return Add_device(sum, value); -#else - return Add_device(sum, value); -#endif + AMREX_IF_ON_DEVICE((return Add_device(sum, value);)) #else - auto old = *sum; - *sum += value; - return old; + AMREX_IF_ON_DEVICE((return Add_device(sum, value);)) #endif + AMREX_IF_ON_HOST(( + auto old = *sum; + *sum += value; + return old; + )) } //////////////////////////////////////////////////////////////////////// @@ -252,18 +251,19 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool If (T* const add, T const value, Op&& op, Cond&& cond) noexcept { -#if AMREX_DEVICE_COMPILE - return If_device(add, value, std::forward(op), std::forward(cond)); -#else - T old = *add; - T const tmp = op(old, value); - if (cond(tmp)) { - *add = tmp; - return true; - } else { - return false; - } -#endif + AMREX_IF_ON_DEVICE(( + return If_device(add, value, std::forward(op), std::forward(cond)); + )) + AMREX_IF_ON_HOST(( + T old = *add; + T const tmp = op(old, value); + if (cond(tmp)) { + *add = tmp; + return true; + } else { + return false; + } + )) } //////////////////////////////////////////////////////////////////////// @@ -278,14 +278,11 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet (T* sum, T value) noexcept { -#if AMREX_DEVICE_COMPILE -#ifdef AMREX_USE_SYCL +#if defined(__SYCL_DEVICE_ONLY__) Add_device(sum, value); #else - Add_device(sum, value); -#endif -#else - *sum += value; + AMREX_IF_ON_DEVICE((Add_device(sum, value);)) + AMREX_IF_ON_HOST((*sum += value;)) #endif } @@ -293,14 +290,11 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet (float* const sum, float const value) noexcept { -#if AMREX_DEVICE_COMPILE #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" - atomicAddNoRet(sum, value); + AMREX_IF_ON_DEVICE((atomicAddNoRet(sum, value);)) #pragma clang diagnostic pop -#else - *sum += value; -#endif + AMREX_IF_ON_HOST((*sum += value;)) } #endif @@ -314,18 +308,18 @@ namespace detail { AMREX_GPU_DEVICE AMREX_FORCE_INLINE T Min_device (T* const m, T const value) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return atomicMin(m, value); -#elif defined(__SYCL_DEVICE_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; sycl::atomic_ref a{*m}; return a.fetch_min(value); #else - amrex::ignore_unused(m,value); - return T(); // should never get here, but have to return something + AMREX_IF_ON_DEVICE(( return atomicMin(m, value); )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(m,value); + return T(); // should never get here, but have to return something + )) #endif } @@ -357,13 +351,14 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Min (T* const m, T const value) noexcept { -#if AMREX_DEVICE_COMPILE - return Min_device(m, value); -#else - auto const old = *m; - *m = (*m) < value ? (*m) : value; - return old; -#endif + AMREX_IF_ON_DEVICE(( + return Min_device(m, value); + )) + AMREX_IF_ON_HOST(( + auto const old = *m; + *m = (*m) < value ? (*m) : value; + return old; + )) } //////////////////////////////////////////////////////////////////////// @@ -376,18 +371,18 @@ namespace detail { AMREX_GPU_DEVICE AMREX_FORCE_INLINE T Max_device (T* const m, T const value) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return atomicMax(m, value); -#elif defined(__SYCL_DEVICE_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; sycl::atomic_ref a{*m}; return a.fetch_max(value); #else - amrex::ignore_unused(m,value); - return T(); // should never get here, but have to return something + AMREX_IF_ON_DEVICE(( return atomicMax(m, value); )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(m,value); + return T(); // should never get here, but have to return something + )) #endif } @@ -419,13 +414,14 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Max (T* const m, T const value) noexcept { -#if AMREX_DEVICE_COMPILE - return Max_device(m, value); -#else - auto const old = *m; - *m = (*m) > value ? (*m) : value; - return old; -#endif + AMREX_IF_ON_DEVICE(( + return Max_device(m, value); + )) + AMREX_IF_ON_HOST(( + auto const old = *m; + *m = (*m) > value ? (*m) : value; + return old; + )) } //////////////////////////////////////////////////////////////////////// @@ -435,19 +431,21 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int LogicalOr (int* const m, int const value) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return atomicOr(m, value); -#elif defined(__SYCL_DEVICE_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; sycl::atomic_ref a{*m}; return a.fetch_or(value); #else - int const old = *m; - *m = (*m) || value; - return old; + AMREX_IF_ON_DEVICE(( + return atomicOr(m, value); + )) + AMREX_IF_ON_HOST(( + int const old = *m; + *m = (*m) || value; + return old; + )) #endif } @@ -458,19 +456,21 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int LogicalAnd (int* const m, int const value) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return atomicAnd(m, value ? ~0x0 : 0); -#elif defined(__SYCL_DEVICE_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; sycl::atomic_ref a{*m}; return a.fetch_and(value ? ~0x0 : 0); #else - int const old = *m; - *m = (*m) && value; - return old; + AMREX_IF_ON_DEVICE(( + return atomicAnd(m, value ? ~0x0 : 0); + )) + AMREX_IF_ON_HOST(( + int const old = *m; + *m = (*m) && value; + return old; + )) #endif } @@ -482,19 +482,21 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Exch (T* address, T val) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return atomicExch(address, val); -#elif defined(__SYCL_DEVICE_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; sycl::atomic_ref a{*address}; return a.exchange(val); #else - auto const old = *address; - *address = val; - return old; + AMREX_IF_ON_DEVICE(( + return atomicExch(address, val); + )) + AMREX_IF_ON_HOST(( + auto const old = *address; + *address = val; + return old; + )) #endif } @@ -506,10 +508,7 @@ namespace detail { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T CAS (T* const address, T compare, T const val) noexcept { // cannot be T const compare because of compare_exchange_strong -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return atomicCAS(address, compare, val); -#elif defined(__SYCL_DEVICE_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; @@ -517,9 +516,14 @@ namespace detail { a.compare_exchange_strong(compare, val); return compare; #else - auto const old = *address; - *address = (old == compare ? val : old); - return old; + AMREX_IF_ON_DEVICE(( + return atomicCAS(address, compare, val); + )) + AMREX_IF_ON_HOST(( + auto const old = *address; + *address = (old == compare ? val : old); + return old; + )) #endif } } @@ -527,17 +531,21 @@ namespace detail { namespace HostDevice::Atomic { template - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void Add (T* const sum, T const value) noexcept + AMREX_FORCE_INLINE + void Add_Host (T* const sum, T const value) noexcept { -#if AMREX_DEVICE_COMPILE - Gpu::Atomic::AddNoRet(sum,value); -#else #ifdef AMREX_USE_OMP #pragma omp atomic update #endif *sum += value; -#endif + } + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void Add (T* const sum, T const value) noexcept + { + AMREX_IF_ON_DEVICE((Gpu::Atomic::AddNoRet(sum,value);)) + AMREX_IF_ON_HOST((Add_Host(sum,value);)) } } diff --git a/Src/Base/AMReX_GpuLaunch.H b/Src/Base/AMReX_GpuLaunch.H index c1870d2ef58..c4ba7dd86bc 100644 --- a/Src/Base/AMReX_GpuLaunch.H +++ b/Src/Base/AMReX_GpuLaunch.H @@ -103,20 +103,21 @@ namespace Gpu { inline Box getThreadBox (const Box& bx, Long offset) noexcept { -#if AMREX_DEVICE_COMPILE - const auto len = bx.length3d(); - Long k = offset / (len[0]*len[1]); - Long j = (offset - k*(len[0]*len[1])) / len[0]; - Long i = (offset - k*(len[0]*len[1])) - j*len[0]; - IntVect iv{AMREX_D_DECL(static_cast(i), - static_cast(j), - static_cast(k))}; - iv += bx.smallEnd(); - return (bx & Box(iv,iv,bx.type())); -#else - amrex::ignore_unused(offset); - return bx; -#endif + AMREX_IF_ON_DEVICE(( + const auto len = bx.length3d(); + Long k = offset / (len[0]*len[1]); + Long j = (offset - k*(len[0]*len[1])) / len[0]; + Long i = (offset - k*(len[0]*len[1])) - j*len[0]; + IntVect iv{AMREX_D_DECL(static_cast(i), + static_cast(j), + static_cast(k))}; + iv += bx.smallEnd(); + return (bx & Box(iv,iv,bx.type())); + )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(offset); + return bx; + )) } // ************************************************ diff --git a/Src/Base/AMReX_GpuQualifiers.H b/Src/Base/AMReX_GpuQualifiers.H index 1c0b5731762..4fba23a849a 100644 --- a/Src/Base/AMReX_GpuQualifiers.H +++ b/Src/Base/AMReX_GpuQualifiers.H @@ -8,6 +8,12 @@ #include #endif +#if defined(AMREX_USE_CUDA) && (defined(AMREX_CXX_PGI) || defined(AMREX_CXX_NVHPC)) +#include +#define AMREX_IF_ON_DEVICE(CODE) NV_IF_TARGET(NV_IS_DEVICE, CODE) +#define AMREX_IF_ON_HOST(CODE) NV_IF_TARGET(NV_IS_HOST, CODE) +#endif + #define AMREX_GPU_HOST __host__ #define AMREX_GPU_DEVICE __device__ #define AMREX_GPU_GLOBAL __global__ @@ -31,6 +37,29 @@ #define AMREX_DEVICE_COMPILE (__CUDA_ARCH__ || __HIP_DEVICE_COMPILE__ || __SYCL_DEVICE_ONLY__) +// Remove surrounding parentheses if present +#define AMREX_IMPL_STRIP_PARENS(X) AMREX_IMPL_ESC(AMREX_IMPL_ISH X) +#define AMREX_IMPL_ISH(...) AMREX_IMPL_ISH __VA_ARGS__ +#define AMREX_IMPL_ESC(...) AMREX_IMPL_ESC_(__VA_ARGS__) +#define AMREX_IMPL_ESC_(...) AMREX_IMPL_VAN_##__VA_ARGS__ +#define AMREX_IMPL_VAN_AMREX_IMPL_ISH + +#if !defined(AMREX_IF_ON_DEVICE) && !defined(AMREX_IF_ON_HOST) +#if (defined(AMREX_USE_CUDA) && defined(__CUDA_ARCH__)) || \ + (defined(AMREX_USE_HIP) && defined(__HIP_DEVICE_COMPILE__)) || \ + (defined(AMREX_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)) +#define AMREX_IF_ON_DEVICE(CODE) \ + { AMREX_IMPL_STRIP_PARENS(CODE) } +#define AMREX_IF_ON_HOST(CODE) \ + {} +#else +#define AMREX_IF_ON_DEVICE(CODE) \ + {} +#define AMREX_IF_ON_HOST(CODE) \ + { AMREX_IMPL_STRIP_PARENS(CODE) } +#endif +#endif + #ifdef AMREX_USE_SYCL # include #endif diff --git a/Src/Base/AMReX_GpuRange.H b/Src/Base/AMReX_GpuRange.H index b8d2ab89d08..be5071dbf8a 100644 --- a/Src/Base/AMReX_GpuRange.H +++ b/Src/Base/AMReX_GpuRange.H @@ -32,31 +32,31 @@ Long at (T const& /*b*/, Long offset) noexcept { return offset; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Long size (Box const& b) noexcept { -#if AMREX_DEVICE_COMPILE - return b.numPts(); -#else - amrex::ignore_unused(b); - return 1; -#endif + AMREX_IF_ON_DEVICE((return b.numPts();)) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(b); + return 1; + )) } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box at (Box const& b, Long offset) noexcept { -#if AMREX_DEVICE_COMPILE - auto len = b.length3d(); - Long k = offset / (len[0]*len[1]); - Long j = (offset - k*(len[0]*len[1])) / len[0]; - Long i = (offset - k*(len[0]*len[1])) - j*len[0]; - IntVect iv{AMREX_D_DECL(static_cast(i), - static_cast(j), - static_cast(k))}; - iv += b.smallEnd(); - return Box(iv,iv,b.type()); -#else - amrex::ignore_unused(offset); - return b; -#endif + AMREX_IF_ON_DEVICE(( + auto len = b.length3d(); + Long k = offset / (len[0]*len[1]); + Long j = (offset - k*(len[0]*len[1])) / len[0]; + Long i = (offset - k*(len[0]*len[1])) - j*len[0]; + IntVect iv{AMREX_D_DECL(static_cast(i), + static_cast(j), + static_cast(k))}; + iv += b.smallEnd(); + return Box(iv,iv,b.type()); + )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(offset); + return b; + )) } template @@ -92,13 +92,15 @@ struct range_impl [[nodiscard]] AMREX_GPU_HOST_DEVICE iterator begin () const noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return iterator(m_b, blockDim.x*blockIdx.x+threadIdx.x, blockDim.x*gridDim.x); -#elif defined (__SYCL_DEVICE_ONLY__) +#if defined (__SYCL_DEVICE_ONLY__) return iterator(m_b, m_gid, m_grange); #else - return iterator(m_b,0,1); + AMREX_IF_ON_DEVICE(( + return iterator(m_b, blockDim.x*blockIdx.x+threadIdx.x, blockDim.x*gridDim.x); + )) + AMREX_IF_ON_HOST(( + return iterator(m_b,0,1); + )) #endif } diff --git a/Src/Base/AMReX_GpuUtility.H b/Src/Base/AMReX_GpuUtility.H index ce98556fc5c..4adc111f5e2 100644 --- a/Src/Base/AMReX_GpuUtility.H +++ b/Src/Base/AMReX_GpuUtility.H @@ -26,8 +26,9 @@ namespace Gpu { template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T LDG (Array4 const& a, int i, int j, int k) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) - return __ldg(a.ptr(i,j,k)); +#if defined(AMREX_USE_CUDA) + AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k));)) + AMREX_IF_ON_HOST((return a(i,j,k);)) #else return a(i,j,k); #endif @@ -36,8 +37,9 @@ namespace Gpu { template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T LDG (Array4 const& a, int i, int j, int k, int n) noexcept { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) - return __ldg(a.ptr(i,j,k,n)); +#if defined(AMREX_USE_CUDA) + AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k,n));)) + AMREX_IF_ON_HOST((return a(i,j,k,n);)) #else return a(i,j,k,n); #endif diff --git a/Src/Base/AMReX_Math.H b/Src/Base/AMReX_Math.H index 769b9bf50f4..506289d03d5 100644 --- a/Src/Base/AMReX_Math.H +++ b/Src/Base/AMReX_Math.H @@ -68,11 +68,9 @@ double cospi (double x) { #if defined(AMREX_USE_SYCL) return sycl::cospi(x); -#elif defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return ::cospi(x); #else - return std::cos(pi()*x); + AMREX_IF_ON_DEVICE(( return ::cospi(x); )) + AMREX_IF_ON_HOST(( return std::cos(pi()*x); )) #endif } @@ -82,11 +80,9 @@ float cospi (float x) { #if defined(AMREX_USE_SYCL) return sycl::cospi(x); -#elif defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return ::cospif(x); #else - return std::cos(pi()*x); + AMREX_IF_ON_DEVICE(( return ::cospif(x); )) + AMREX_IF_ON_HOST(( return std::cos(pi()*x); )) #endif } @@ -96,11 +92,9 @@ double sinpi (double x) { #if defined(AMREX_USE_SYCL) return sycl::sinpi(x); -#elif defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return ::sinpi(x); #else - return std::sin(pi()*x); + AMREX_IF_ON_DEVICE(( return ::sinpi(x); )) + AMREX_IF_ON_HOST(( return std::sin(pi()*x); )) #endif } @@ -110,14 +104,32 @@ float sinpi (float x) { #if defined(AMREX_USE_SYCL) return sycl::sinpi(x); -#elif defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return ::sinpif(x); #else - return std::sin(pi()*x); + AMREX_IF_ON_DEVICE(( return ::sinpif(x); )) + AMREX_IF_ON_HOST(( return std::sin(pi()*x); )) #endif } +namespace detail { + AMREX_FORCE_INLINE void sincos (double x, double* sinx, double* cosx) { +#if defined(_GNU_SOURCE) && !defined(__APPLE__) + ::sincos(x, sinx, cosx); +#else + *sinx = std::sin(x); + *cosx = std::cos(x); +#endif + } + + AMREX_FORCE_INLINE void sincosf (float x, float* sinx, float* cosx) { +#if defined(_GNU_SOURCE) && !defined(__APPLE__) + ::sincosf(x, sinx, cosx); +#else + *sinx = std::sin(x); + *cosx = std::cos(x); +#endif + } +} + //! Return sine and cosine of given number AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::pair sincos (double x) @@ -125,13 +137,9 @@ std::pair sincos (double x) std::pair r; #if defined(AMREX_USE_SYCL) r.first = sycl::sincos(x, sycl::private_ptr(&r.second)); -#elif defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) || \ - (defined(_GNU_SOURCE) && !defined(__APPLE__)) - ::sincos(x, &r.first, &r.second); #else - r.first = std::sin(x); - r.second = std::cos(x); + AMREX_IF_ON_DEVICE(( ::sincos(x, &r.first, &r.second); )) + AMREX_IF_ON_HOST(( detail::sincos(x, &r.first, &r.second); )) #endif return r; } @@ -143,13 +151,9 @@ std::pair sincos (float x) std::pair r; #if defined(AMREX_USE_SYCL) r.first = sycl::sincos(x, sycl::private_ptr(&r.second)); -#elif defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) || \ - (defined(_GNU_SOURCE) && !defined(__APPLE__)) - ::sincosf(x, &r.first, &r.second); #else - r.first = std::sin(x); - r.second = std::cos(x); + AMREX_IF_ON_DEVICE(( ::sincosf(x, &r.first, &r.second); )) + AMREX_IF_ON_HOST(( detail::sincosf(x, &r.first, &r.second); )) #endif return r; } @@ -159,11 +163,11 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::pair sincospi (double x) { std::pair r; -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - ::sincospi(x, &r.first, &r.second); -#else +#if defined(AMREX_USE_SYCL) r = sincos(pi()*x); +#else + AMREX_IF_ON_DEVICE(( ::sincospi(x, &r.first, &r.second); )) + AMREX_IF_ON_HOST(( r = sincos(pi()*x); )) #endif return r; } @@ -173,11 +177,11 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::pair sincospi (float x) { std::pair r; -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) || \ - defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - ::sincospif(x, &r.first, &r.second); -#else +#if defined(AMREX_USE_SYCL) r = sincos(pi()*x); +#else + AMREX_IF_ON_DEVICE(( ::sincospif(x, &r.first, &r.second); )) + AMREX_IF_ON_HOST(( r = sincos(pi()*x); )) #endif return r; } diff --git a/Src/Base/AMReX_Random.H b/Src/Base/AMReX_Random.H index 675c12082d5..50b2c2693b0 100644 --- a/Src/Base/AMReX_Random.H +++ b/Src/Base/AMReX_Random.H @@ -23,24 +23,29 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real Random (RandomEngine const& random_engine) { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) -#ifdef BL_USE_FLOAT - return 1.0f - curand_uniform(random_engine.rand_state); +#if defined (__SYCL_DEVICE_ONLY__) + mkl::rng::device::uniform distr; + return mkl::rng::device::generate(distr, *random_engine.engine); #else - return 1.0 - curand_uniform_double(random_engine.rand_state); -#endif -#elif defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) #ifdef BL_USE_FLOAT - return 1.0f - hiprand_uniform(random_engine.rand_state); + AMREX_IF_ON_DEVICE(( + AMREX_HIP_OR_CUDA( + return 1.0f - hiprand_uniform(random_engine.rand_state); , + return 1.0f - curand_uniform(random_engine.rand_state); + ) + )) #else - return 1.0 - hiprand_uniform_double(random_engine.rand_state); + AMREX_IF_ON_DEVICE(( + AMREX_HIP_OR_CUDA( + return 1.0 - hiprand_uniform_double(random_engine.rand_state); , + return 1.0 - curand_uniform_double(random_engine.rand_state); + ) + )) #endif -#elif defined (__SYCL_DEVICE_ONLY__) - mkl::rng::device::uniform distr; - return mkl::rng::device::generate(distr, *random_engine.engine); -#else - amrex::ignore_unused(random_engine); - return Random(); + AMREX_IF_ON_HOST(( + amrex::ignore_unused(random_engine); + return Random(); + )) #endif } @@ -56,24 +61,29 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real RandomNormal (Real mean, Real stddev, RandomEngine const& random_engine) { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) -#ifdef BL_USE_FLOAT - return stddev * curand_normal(random_engine.rand_state) + mean; +#if defined (__SYCL_DEVICE_ONLY__) + mkl::rng::device::gaussian distr(mean, stddev); + return mkl::rng::device::generate(distr, *random_engine.engine); #else - return stddev * curand_normal_double(random_engine.rand_state) + mean; -#endif -#elif defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) #ifdef BL_USE_FLOAT - return stddev * hiprand_normal(random_engine.rand_state) + mean; + AMREX_IF_ON_DEVICE(( + AMREX_HIP_OR_CUDA( + return stddev * hiprand_normal(random_engine.rand_state) + mean; , + return stddev * curand_normal(random_engine.rand_state) + mean; + ) + )) #else - return stddev * hiprand_normal_double(random_engine.rand_state) + mean; + AMREX_IF_ON_DEVICE(( + AMREX_HIP_OR_CUDA( + return stddev * hiprand_normal_double(random_engine.rand_state) + mean; , + return stddev * curand_normal_double(random_engine.rand_state) + mean; + ) + )) #endif -#elif defined (__SYCL_DEVICE_ONLY__) - mkl::rng::device::gaussian distr(mean, stddev); - return mkl::rng::device::generate(distr, *random_engine.engine); -#else - amrex::ignore_unused(random_engine); - return RandomNormal(mean, stddev); + AMREX_IF_ON_HOST(( + amrex::ignore_unused(random_engine); + return RandomNormal(mean, stddev); + )) #endif } @@ -91,16 +101,20 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE unsigned int RandomPoisson (Real lambda, RandomEngine const& random_engine) { -#if defined(__CUDA_ARCH__) && defined(AMREX_USE_CUDA) - return curand_poisson(random_engine.rand_state, lambda); -#elif defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP) - return hiprand_poisson(random_engine.rand_state, lambda); -#elif defined (__SYCL_DEVICE_ONLY__) +#if defined (__SYCL_DEVICE_ONLY__) mkl::rng::device::poisson distr(lambda); return mkl::rng::device::generate(distr, *random_engine.engine); #else - amrex::ignore_unused(random_engine); - return RandomPoisson(lambda); + AMREX_IF_ON_DEVICE(( + AMREX_HIP_OR_CUDA( + return hiprand_poisson(random_engine.rand_state, lambda); , + return curand_poisson(random_engine.rand_state, lambda); + ) + )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(random_engine); + return RandomPoisson(lambda); + )) #endif } @@ -116,22 +130,23 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE unsigned int Random_int (unsigned int n, RandomEngine const& random_engine) { -#if AMREX_DEVICE_COMPILE #if defined(__SYCL_DEVICE_ONLY__) mkl::rng::device::uniform distr(0,n); return mkl::rng::device::generate(distr, *random_engine.engine); #else - unsigned int rand; - constexpr unsigned int RAND_M = 4294967295; // 2**32-1 - do { - AMREX_HIP_OR_CUDA( rand = hiprand(random_engine.rand_state);, - rand = curand(random_engine.rand_state) ); - } while (rand > (RAND_M - RAND_M % n)); - return rand % n; -#endif -#else - amrex::ignore_unused(random_engine); - return Random_int(n); + AMREX_IF_ON_DEVICE(( + unsigned int rand; + constexpr unsigned int RAND_M = 4294967295; // 2**32-1 + do { + AMREX_HIP_OR_CUDA( rand = hiprand(random_engine.rand_state);, + rand = curand(random_engine.rand_state) ); + } while (rand > (RAND_M - RAND_M % n)); + return rand % n; + )) + AMREX_IF_ON_HOST(( + amrex::ignore_unused(random_engine); + return Random_int(n); + )) #endif } diff --git a/Src/Base/AMReX_TableData.H b/Src/Base/AMReX_TableData.H index b7572e2a1cf..842225e53f4 100644 --- a/Src/Base/AMReX_TableData.H +++ b/Src/Base/AMReX_TableData.H @@ -57,16 +57,17 @@ struct Table1D void index_assert (int i) const { if (i < begin || i >= end) { -#if AMREX_DEVICE_COMPILE - AMREX_DEVICE_PRINTF(" (%d) is out of bound (%d:%d)\n", - i, begin, end-1); - amrex::Abort(); -#else - std::stringstream ss; - ss << " (" << i << ") is out of bound (" - << begin << ":" << end-1 << ")"; - amrex::Abort(ss.str()); -#endif + AMREX_IF_ON_DEVICE(( + AMREX_DEVICE_PRINTF(" (%d) is out of bound (%d:%d)\n", + i, begin, end-1); + amrex::Abort(); + )) + AMREX_IF_ON_HOST(( + std::stringstream ss; + ss << " (" << i << ") is out of bound (" + << begin << ":" << end-1 << ")"; + amrex::Abort(ss.str()); + )) } } #endif @@ -120,17 +121,18 @@ struct Table2D { if (i < begin[0] || i >= end[0] || j < begin[1] || j >= end[1]) { -#if AMREX_DEVICE_COMPILE - AMREX_DEVICE_PRINTF(" (%d,%d) is out of bound (%d:%d,%d:%d)\n", - i, j, begin[0], end[0]-1, begin[1], end[1]-1); - amrex::Abort(); -#else - std::stringstream ss; - ss << " (" << i << "," << j << ") is out of bound (" - << begin[0] << ":" << end[0]-1 - << "," << begin[1] << ":" << end[1]-1 << ")"; - amrex::Abort(ss.str()); -#endif + AMREX_IF_ON_DEVICE(( + AMREX_DEVICE_PRINTF(" (%d,%d) is out of bound (%d:%d,%d:%d)\n", + i, j, begin[0], end[0]-1, begin[1], end[1]-1); + amrex::Abort(); + )) + AMREX_IF_ON_HOST(( + std::stringstream ss; + ss << " (" << i << "," << j << ") is out of bound (" + << begin[0] << ":" << end[0]-1 + << "," << begin[1] << ":" << end[1]-1 << ")"; + amrex::Abort(ss.str()); + )) } } #endif @@ -188,19 +190,20 @@ struct Table3D if (i < begin[0] || i >= end[0] || j < begin[1] || j >= end[1] || k < begin[2] || k >= end[2]) { -#if AMREX_DEVICE_COMPILE - AMREX_DEVICE_PRINTF(" (%d,%d,%d) is out of bound (%d:%d,%d:%d,%d:%d)\n", - i, j, k, begin[0], end[0]-1, begin[1], end[1]-1, - begin[2], end[2]-1); - amrex::Abort(); -#else - std::stringstream ss; - ss << " (" << i << "," << j << "," << k << ") is out of bound (" - << begin[0] << ":" << end[0]-1 - << "," << begin[1] << ":" << end[1]-1 - << "," << begin[2] << ":" << end[2]-1 << ")"; - amrex::Abort(ss.str()); -#endif + AMREX_IF_ON_DEVICE(( + AMREX_DEVICE_PRINTF(" (%d,%d,%d) is out of bound (%d:%d,%d:%d,%d:%d)\n", + i, j, k, begin[0], end[0]-1, begin[1], end[1]-1, + begin[2], end[2]-1); + amrex::Abort(); + )) + AMREX_IF_ON_HOST(( + std::stringstream ss; + ss << " (" << i << "," << j << "," << k << ") is out of bound (" + << begin[0] << ":" << end[0]-1 + << "," << begin[1] << ":" << end[1]-1 + << "," << begin[2] << ":" << end[2]-1 << ")"; + amrex::Abort(ss.str()); + )) } } #endif @@ -262,20 +265,21 @@ struct Table4D j < begin[1] || j >= end[1] || k < begin[2] || k >= end[2] || n < begin[3] || n >= end[3]) { -#if AMREX_DEVICE_COMPILE - AMREX_DEVICE_PRINTF(" (%d,%d,%d,%d) is out of bound (%d:%d,%d:%d,%d:%d,%d:%d)\n", - i, j, k, n, begin[0], end[0]-1, begin[1], end[1]-1, - begin[2], end[2]-1, begin[3], end[3]-1); - amrex::Abort(); -#else - std::stringstream ss; - ss << " (" << i << "," << j << "," << k << "," << n << ") is out of bound (" - << begin[0] << ":" << end[0]-1 - << "," << begin[1] << ":" << end[1]-1 - << "," << begin[2] << ":" << end[2]-1 - << "," << begin[3] << ":" << end[3]-1 << ")"; - amrex::Abort(ss.str()); -#endif + AMREX_IF_ON_DEVICE(( + AMREX_DEVICE_PRINTF(" (%d,%d,%d,%d) is out of bound (%d:%d,%d:%d,%d:%d,%d:%d)\n", + i, j, k, n, begin[0], end[0]-1, begin[1], end[1]-1, + begin[2], end[2]-1, begin[3], end[3]-1); + amrex::Abort(); + )) + AMREX_IF_ON_HOST(( + std::stringstream ss; + ss << " (" << i << "," << j << "," << k << "," << n << ") is out of bound (" + << begin[0] << ":" << end[0]-1 + << "," << begin[1] << ":" << end[1]-1 + << "," << begin[2] << ":" << end[2]-1 + << "," << begin[3] << ":" << end[3]-1 << ")"; + amrex::Abort(ss.str()); + )) } } #endif diff --git a/Src/Base/Parser/AMReX_IParser.H b/Src/Base/Parser/AMReX_IParser.H index 69f40252b0c..025da853c28 100644 --- a/Src/Base/Parser/AMReX_IParser.H +++ b/Src/Base/Parser/AMReX_IParser.H @@ -20,11 +20,8 @@ struct IParserExecutor [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int operator() () const noexcept { -#if AMREX_DEVICE_COMPILE - return iparser_exe_eval(m_device_executor, nullptr); -#else - return iparser_exe_eval(m_host_executor, nullptr); -#endif + AMREX_IF_ON_DEVICE((return iparser_exe_eval(m_device_executor, nullptr);)) + AMREX_IF_ON_HOST((return iparser_exe_eval(m_host_executor, nullptr);)) } template @@ -33,30 +30,21 @@ struct IParserExecutor operator() (Ts... var) const noexcept { amrex::GpuArray l_var{var...}; -#if AMREX_DEVICE_COMPILE - return iparser_exe_eval(m_device_executor, l_var.data()); -#else - return iparser_exe_eval(m_host_executor, l_var.data()); -#endif + AMREX_IF_ON_DEVICE((return iparser_exe_eval(m_device_executor, l_var.data());)) + AMREX_IF_ON_HOST((return iparser_exe_eval(m_host_executor, l_var.data());)) } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int operator() (GpuArray const& var) const noexcept { -#if AMREX_DEVICE_COMPILE - return iparser_exe_eval(m_device_executor, var.data()); -#else - return iparser_exe_eval(m_host_executor, var.data()); -#endif + AMREX_IF_ON_DEVICE((return iparser_exe_eval(m_device_executor, var.data());)) + AMREX_IF_ON_HOST((return iparser_exe_eval(m_host_executor, var.data());)) } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE explicit operator bool () const { -#if AMREX_DEVICE_COMPILE - return m_device_executor != nullptr; -#else - return m_host_executor != nullptr; -#endif + AMREX_IF_ON_DEVICE((return m_device_executor != nullptr;)) + AMREX_IF_ON_HOST((return m_host_executor != nullptr;)) } char* m_host_executor = nullptr; diff --git a/Src/Base/Parser/AMReX_Parser.H b/Src/Base/Parser/AMReX_Parser.H index b74de941950..456910f8736 100644 --- a/Src/Base/Parser/AMReX_Parser.H +++ b/Src/Base/Parser/AMReX_Parser.H @@ -21,11 +21,8 @@ struct ParserExecutor [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE double operator() () const noexcept { -#if AMREX_DEVICE_COMPILE - return parser_exe_eval(m_device_executor, nullptr); -#else - return parser_exe_eval(m_host_executor, nullptr); -#endif + AMREX_IF_ON_DEVICE((return parser_exe_eval(m_device_executor, nullptr);)) + AMREX_IF_ON_HOST((return parser_exe_eval(m_host_executor, nullptr);)) } template @@ -34,11 +31,8 @@ struct ParserExecutor operator() (Ts... var) const noexcept { amrex::GpuArray l_var{var...}; -#if AMREX_DEVICE_COMPILE - return parser_exe_eval(m_device_executor, l_var.data()); -#else - return parser_exe_eval(m_host_executor, l_var.data()); -#endif + AMREX_IF_ON_DEVICE((return parser_exe_eval(m_device_executor, l_var.data());)) + AMREX_IF_ON_HOST((return parser_exe_eval(m_host_executor, l_var.data());)) } template @@ -47,30 +41,21 @@ struct ParserExecutor operator() (Ts... var) const noexcept { amrex::GpuArray l_var{var...}; -#if AMREX_DEVICE_COMPILE - return static_cast(parser_exe_eval(m_device_executor, l_var.data())); -#else - return static_cast(parser_exe_eval(m_host_executor, l_var.data())); -#endif + AMREX_IF_ON_DEVICE((return static_cast(parser_exe_eval(m_device_executor, l_var.data()));)) + AMREX_IF_ON_HOST((return static_cast(parser_exe_eval(m_host_executor, l_var.data()));)) } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE double operator() (GpuArray const& var) const noexcept { -#if AMREX_DEVICE_COMPILE - return parser_exe_eval(m_device_executor, var.data()); -#else - return parser_exe_eval(m_host_executor, var.data()); -#endif + AMREX_IF_ON_DEVICE((return parser_exe_eval(m_device_executor, var.data());)) + AMREX_IF_ON_HOST((return parser_exe_eval(m_host_executor, var.data());)) } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE explicit operator bool () const { -#if AMREX_DEVICE_COMPILE - return m_device_executor != nullptr; -#else - return m_host_executor != nullptr; -#endif + AMREX_IF_ON_DEVICE((return m_device_executor != nullptr;)) + AMREX_IF_ON_HOST((return m_host_executor != nullptr;)) } char* m_host_executor = nullptr; diff --git a/Src/Base/Parser/AMReX_Parser_Y.H b/Src/Base/Parser/AMReX_Parser_Y.H index 792f796554e..e84cf9e0d59 100644 --- a/Src/Base/Parser/AMReX_Parser_Y.H +++ b/Src/Base/Parser/AMReX_Parser_Y.H @@ -350,7 +350,7 @@ template AMREX_GPU_HOST_DEVICE AMREX_NO_INLINE T parser_math_comp_ellint_1 (T a) { -#if defined(__GNUC__) && !defined(__clang__) && !defined(__CUDA_ARCH__) +#if defined(__GNUC__) && !defined(__clang__) && !defined(__CUDA_ARCH__) && !defined(__NVCOMPILER) return std::comp_ellint_1(a); #else amrex::ignore_unused(a); @@ -363,7 +363,7 @@ template AMREX_GPU_HOST_DEVICE AMREX_NO_INLINE T parser_math_comp_ellint_2 (T a) { -#if defined(__GNUC__) && !defined(__clang__) && !defined(__CUDA_ARCH__) +#if defined(__GNUC__) && !defined(__clang__) && !defined(__CUDA_ARCH__) && !defined(__NVCOMPILER) return std::comp_ellint_2(a); #else amrex::ignore_unused(a); diff --git a/Src/EB/AMReX_EB2_GeometryShop.H b/Src/EB/AMReX_EB2_GeometryShop.H index ee353c13952..33931b28c5f 100644 --- a/Src/EB/AMReX_EB2_GeometryShop.H +++ b/Src/EB/AMReX_EB2_GeometryShop.H @@ -28,13 +28,12 @@ AMREX_GPU_HOST_DEVICE Real IF_f (F const& f, GpuArray const& p) noexcept { -#if AMREX_DEVICE_COMPILE - amrex::ignore_unused(f,p); - amrex::Error("EB2::GeometryShop: how did this happen?"); - return 0.0; -#else - return f({AMREX_D_DECL(p[0],p[1],p[2])}); -#endif + AMREX_IF_ON_DEVICE(( + amrex::ignore_unused(f,p); + amrex::Error("EB2::GeometryShop: how did this happen?"); + return 0.0; + )) + AMREX_IF_ON_HOST((return f({AMREX_D_DECL(p[0],p[1],p[2])});)) } template From 6e2b831245f6fdac9a714c64417bd5ab21fb613d Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Mon, 13 Nov 2023 15:00:36 -0800 Subject: [PATCH 033/186] solve_bicgstab: use linop.make instead of MF constructor (#3619) ## Summary This PR replaces the explicit use of MF constructors in ```MLCGSolverT::solve_bicgstab``` with calls to the `make` method of the linear operator associated with the MLCGSolverT object. ## Additional background The use of `MLLinOpT::make` allows for inheritance of MLCGSolverT without an override of `solve_bicgstab` even if the MF class lacks a constructor with the same arguments as those MultiFab. For the MLMG template classes, `make` should generally be used instead of explicit MF constructors. Another PR to change this in `solve_cg` will follow once this is fully vetted and approved. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index 3764fa38f8a..fce7b1d5005 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -90,22 +90,18 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) const int ncomp = sol.nComp(); - const BoxArray& ba = sol.boxArray(); - const DistributionMapping& dm = sol.DistributionMap(); - const auto& factory = sol.Factory(); - - MF ph(ba, dm, ncomp, sol.nGrowVect(), MFInfo(), factory); - MF sh(ba, dm, ncomp, sol.nGrowVect(), MFInfo(), factory); + MF ph = Lp.make(amrlev, mglev, sol.nGrowVect()); + MF sh = Lp.make(amrlev, mglev, sol.nGrowVect()); ph.setVal(RT(0.0)); sh.setVal(RT(0.0)); - MF sorig(ba, dm, ncomp, nghost, MFInfo(), factory); - MF p (ba, dm, ncomp, nghost, MFInfo(), factory); - MF r (ba, dm, ncomp, nghost, MFInfo(), factory); - MF s (ba, dm, ncomp, nghost, MFInfo(), factory); - MF rh (ba, dm, ncomp, nghost, MFInfo(), factory); - MF v (ba, dm, ncomp, nghost, MFInfo(), factory); - MF t (ba, dm, ncomp, nghost, MFInfo(), factory); + MF sorig = Lp.make(amrlev, mglev, nghost); + MF p = Lp.make(amrlev, mglev, nghost); + MF r = Lp.make(amrlev, mglev, nghost); + MF s = Lp.make(amrlev, mglev, nghost); + MF rh = Lp.make(amrlev, mglev, nghost); + MF v = Lp.make(amrlev, mglev, nghost); + MF t = Lp.make(amrlev, mglev, nghost); Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT::BCMode::Homogeneous); From fa3743fd1fdd5e3f1b12d431b0f6bb4b15bb7b95 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 13 Nov 2023 15:00:56 -0800 Subject: [PATCH 034/186] CArena: shrink_in_place and operator<< (#3621) ## Summary Implement CArena::shrink_in_place, which is used by PODVector::shrink_to_fit. It avoids a new memory allocation and data movement. Add operator<< to CArena. This helps debugging. ## Additional background Follow-up on #3426. ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_CArena.H | 19 ++++---- Src/Base/AMReX_CArena.cpp | 96 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 105 insertions(+), 10 deletions(-) diff --git a/Src/Base/AMReX_CArena.H b/Src/Base/AMReX_CArena.H index d68285bc878..163039df2ef 100644 --- a/Src/Base/AMReX_CArena.H +++ b/Src/Base/AMReX_CArena.H @@ -5,13 +5,14 @@ #include #include -#include -#include +#include +#include #include #include -#include -#include +#include #include +#include +#include namespace amrex { @@ -57,7 +58,7 @@ public: * Try to shrink in-place */ [[nodiscard]] void* - shrink_in_place (void* pt, std::size_t sz) final; + shrink_in_place (void* pt, std::size_t new_size) final; /** * \brief Free up allocated memory. Merge neighboring free memory chunks @@ -164,15 +165,15 @@ protected: MemStat* m_stat; }; + //! The list of blocks allocated via ::operator new(). + std::vector > m_alloc; + /** * \brief The type of our freelist and blocklist. * We use a set sorted from lo to hi memory addresses. */ using NL = std::set; - //! The list of blocks allocated via ::operator new(). - std::vector > m_alloc; - /** * \brief The free list of allocated but not currently used blocks. * Maintained in lo to hi memory sorted order. @@ -198,6 +199,8 @@ protected: std::mutex carena_mutex; + + friend std::ostream& operator<< (std::ostream& os, const CArena& arena); }; } diff --git a/Src/Base/AMReX_CArena.cpp b/Src/Base/AMReX_CArena.cpp index 6f7979d4750..c47f8f5ed26 100644 --- a/Src/Base/AMReX_CArena.cpp +++ b/Src/Base/AMReX_CArena.cpp @@ -14,6 +14,7 @@ namespace amrex { #include #include +#include namespace amrex { @@ -203,9 +204,61 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax) } void* -CArena::shrink_in_place (void* /*pt*/, std::size_t sz) +CArena::shrink_in_place (void* pt, std::size_t new_size) { - return alloc(sz); // xxxxx TODO + if ((pt == nullptr) || (new_size == 0)) { return nullptr; } + + new_size = Arena::align(new_size); + + std::lock_guard lock(carena_mutex); + + auto busy_it = m_busylist.find(Node(pt,nullptr,0)); + if (busy_it == m_busylist.end()) { + amrex::Abort("CArena::shrink_in_place: unknown pointer"); + return nullptr; + } + AMREX_ASSERT(m_freelist.find(*busy_it) == m_freelist.end()); + + auto const old_size = busy_it->size(); + + if (new_size > old_size) { + amrex::Abort("CArena::shrink_in_place: wrong size. Cannot shrink to a larger size."); + return nullptr; + } else if (new_size == old_size) { + return pt; + } else { + auto const leftover_size = old_size - new_size; + + void* pt2 = static_cast(pt) + new_size; + Node new_free_node(pt2, busy_it->owner(), leftover_size); + + void* pt_end = static_cast(pt) + old_size; + auto free_it = m_freelist.find(Node(pt_end,nullptr,0)); + if ((free_it == m_freelist.end()) || ! new_free_node.coalescable(*free_it)) { + m_freelist.insert(free_it, new_free_node); + } else { + auto& node = const_cast(*free_it); + // This is safe because the free list is std::set and the + // modification of `block` does not change the order of elements + // in the container, even though Node's operator< uses block. + node.block(pt2); + node.size(leftover_size + node.size()); + } + + const_cast(*busy_it).size(new_size); + + m_actually_used -= leftover_size; + +#ifdef AMREX_TINY_PROFILING + if (m_do_profiling) { + TinyProfiler::memory_free(old_size, busy_it->mem_stat()); + auto* stat = TinyProfiler::memory_alloc(new_size, m_profiling_stats); + const_cast(*busy_it).mem_stat(stat); + } +#endif + + return pt; + } } void @@ -439,4 +492,43 @@ CArena::PrintUsage (std::ostream& os, std::string const& name, std::string const << m_busylist.size() << " busy blocks, " << m_freelist.size() << " free blocks\n"; } +std::ostream& operator<< (std::ostream& os, const CArena& arena) +{ + os << "CArea:\n" + << " Hunk size: " << arena.m_hunk << "\n" + << " Memory allocated: " << arena.m_used << "\n" + << " Memory actually used: " << arena.m_actually_used << "\n"; + + if (arena.m_alloc.empty()) { + os << " No memory allocations\n"; + } else { + os << " List of memory alloations: (address, size)\n"; + for (auto const& a : arena.m_alloc) { + os << " " << a.first << ", " << a.second << "\n"; + } + } + + if (arena.m_freelist.empty()) { + os << " No free nodes\n"; + } else { + os << " List of free nodes: (address, owner, size)\n"; + for (auto const& a : arena.m_freelist) { + os << " " << a.block() << ", " << a.owner() << ", " + << a.size() << "\n"; + } + } + + if (arena.m_busylist.empty()) { + os << " No busy nodes\n"; + } else { + os << " List of busy nodes: (address, owner, size)\n"; + for (auto const& a : arena.m_busylist) { + os << " " << a.block() << ", " << a.owner() << ", " + << a.size() << "\n"; + } + } + + return os; +} + } From b7408ea6e8feca7eab2f7cf30606d066b6699814 Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Mon, 13 Nov 2023 18:56:02 -0800 Subject: [PATCH 035/186] solve_cg: use linop.make instead of MF constructor (#3627) ## Summary This PR replaces the explicit use of `MF` constructors in `MLCGSolverT::solve_cg` with calls to the make method of the linear operator associated with the `MLCGSolverT` object. ## Additional background This is a similar to the PR on `solve_bicgstab`. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index fce7b1d5005..c99d7b319bd 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -261,17 +261,13 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) const int ncomp = sol.nComp(); - const BoxArray& ba = sol.boxArray(); - const DistributionMapping& dm = sol.DistributionMap(); - const auto& factory = sol.Factory(); - - MF p(ba, dm, ncomp, sol.nGrowVect(), MFInfo(), factory); + MF p = Lp.make(amrlev, mglev, sol.nGrowVect()); p.setVal(RT(0.0)); - MF sorig(ba, dm, ncomp, nghost, MFInfo(), factory); - MF r (ba, dm, ncomp, nghost, MFInfo(), factory); - MF z (ba, dm, ncomp, nghost, MFInfo(), factory); - MF q (ba, dm, ncomp, nghost, MFInfo(), factory); + MF sorig = Lp.make(amrlev, mglev, nghost); + MF r = Lp.make(amrlev, mglev, nghost); + MF z = Lp.make(amrlev, mglev, nghost); + MF q = Lp.make(amrlev, mglev, nghost); sorig.LocalCopy(sol,0,0,ncomp,nghost); From af1e1be8d7d41de9b999b4973b2024281a28f23e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 14 Nov 2023 11:13:47 -0800 Subject: [PATCH 036/186] Plotfile Tools: GPU support (#3626) ## Summary Add GPU support for fcompare, fextrema, fsnapshot and fvolumesum. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_PlotFileDataImpl.cpp | 2 +- Tools/Plotfile/fextrema.cpp | 35 ++++---- Tools/Plotfile/fsnapshot.cpp | 4 +- Tools/Plotfile/fvolumesum.cpp | 130 ++++++++++------------------ 4 files changed, 66 insertions(+), 105 deletions(-) diff --git a/Src/Base/AMReX_PlotFileDataImpl.cpp b/Src/Base/AMReX_PlotFileDataImpl.cpp index 1fbf5044a50..b85c17ad93c 100644 --- a/Src/Base/AMReX_PlotFileDataImpl.cpp +++ b/Src/Base/AMReX_PlotFileDataImpl.cpp @@ -141,7 +141,7 @@ PlotFileDataImpl::get (int level, std::string const& varname) noexcept int gid = mfi.index(); FArrayBox& dstfab = mf[mfi]; std::unique_ptr srcfab(m_vismf[level]->readFAB(gid, icomp)); - dstfab.copy(*srcfab); + dstfab.copy(*srcfab); } } return mf; diff --git a/Tools/Plotfile/fextrema.cpp b/Tools/Plotfile/fextrema.cpp index 44cfaf161d4..55596bf952d 100644 --- a/Tools/Plotfile/fextrema.cpp +++ b/Tools/Plotfile/fextrema.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -80,23 +81,23 @@ void main_main() pf.boxArray(ilev+1), ratio); for (int ivar = 0; ivar < var_names.size(); ++ivar) { const MultiFab& mf = pf.get(ilev, var_names[ivar]); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - const auto& ifab = mask.array(mfi); - const auto& fab = mf.array(mfi); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - if (ifab(i,j,k) == 0) { - vvmin[ivar] = std::min(fab(i,j,k),vvmin[ivar]); - vvmax[ivar] = std::max(fab(i,j,k),vvmax[ivar]); - } - } - } - } - } + auto const& ma = mf.const_arrays(); + auto const& ima = mask.const_arrays(); + auto rr = ParReduce(TypeList{}, + TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + if (ima[bno](i,j,k) == 0) { + auto x = ma[bno](i,j,k); + return {x,x}; + } else { + return {std::numeric_limits::max(), + std::numeric_limits::lowest()}; + } + }); + vvmin[ivar] = std::min(amrex::get<0>(rr), vvmin[ivar]); + vvmax[ivar] = std::max(amrex::get<1>(rr), vvmax[ivar]); } } } diff --git a/Tools/Plotfile/fsnapshot.cpp b/Tools/Plotfile/fsnapshot.cpp index e68f8a33b6d..c4b9fd3f361 100644 --- a/Tools/Plotfile/fsnapshot.cpp +++ b/Tools/Plotfile/fsnapshot.cpp @@ -278,7 +278,7 @@ void main_main() gmx = std::log10(gmx); } - BaseFab intdat; + BaseFab intdat(The_Pinned_Arena()); for (int idir = ndir_begin; idir < ndir_end; ++idir) { intdat.resize(finebox[idir],1); const int width = (idir == 0) ? finebox[idir].length(1) : finebox[idir].length(0); @@ -286,7 +286,7 @@ void main_main() const auto& intarr = intdat.array(); const auto& realarr = datamf[idir].array(0); Real fac = Real(253.999) / (gmx-gmn); - amrex::LoopOnCpu(finebox[idir], [=] (int i, int j, int k) + amrex::ParallelFor(finebox[idir], [=] AMREX_GPU_DEVICE (int i, int j, int k) { int jj = (idir == 2) ? height - 1 - j : j; // flip the data in second image direction int kk = (idir == 2) ? k : height - 1 - k; diff --git a/Tools/Plotfile/fvolumesum.cpp b/Tools/Plotfile/fvolumesum.cpp index 2f9f03cc522..ec6e461dcc7 100644 --- a/Tools/Plotfile/fvolumesum.cpp +++ b/Tools/Plotfile/fvolumesum.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,6 @@ void main_main() const int dim = pf.spaceDim(); - int fine_level = pf.finestLevel(); Vector pos; @@ -95,6 +95,35 @@ void main_main() Array dx = pf.cellSize(ilev); + Real volfac = AMREX_D_TERM(dx[0], *dx[1], *dx[2]); + + if (coord == 1) { + AMREX_ALWAYS_ASSERT(AMREX_SPACEDIM == 2); + // axisymmetric V = pi (r_r**2 - r_l**2) * dz + // = pi dr * dz * (r_r + r_l) + // = 2 pi r dr dz + volfac *= 2 * pi; // 2 * pi * dr * dz part here + } else if (coord == 2) { + AMREX_ALWAYS_ASSERT(AMREX_SPACEDIM == 1); + // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) + volfac *= (4.0_rt/3.0_rt) * pi; // 4/3 * pi * dr part here + } + + auto xlo = problo[0]; + auto dx0 = dx[0]; + AMREX_ASSERT(coord == 0 || coord == 1 || coord == 2); + auto f_vol = [=] AMREX_GPU_DEVICE (int i) { + if (coord == 0) { + return volfac; + } else if (coord == 1) { + return volfac * (xlo + (Real(i)+0.5_rt)*dx0); + } else { + Real r_r = xlo + Real(i+1)*dx0; + Real r_l = xlo + Real(i )*dx0; + return volfac * (r_r*r_r + r_l*r_r + r_l*r_l); + } + }; + if (ilev < fine_level) { IntVect ratio{pf.refRatio(ilev)}; for (int idim = dim; idim < AMREX_SPACEDIM; ++idim) { @@ -103,97 +132,28 @@ void main_main() const iMultiFab mask = makeFineMask(pf.boxArray(ilev), pf.DistributionMap(ilev), pf.boxArray(ilev+1), ratio); const MultiFab& mf = pf.get(ilev, var_name); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - if (bx.ok()) { - const auto& m = mask.array(mfi); - const auto& fab = mf.array(mfi); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - if (m(i,j,k) == 0) { // not covered by fine - Array p - = {AMREX_D_DECL(problo[0]+static_cast(i+0.5)*dx[0], - problo[1]+static_cast(j+0.5)*dx[1], - problo[2]+static_cast(k+0.5)*dx[2])}; - - // compute the volume - Real vol = std::numeric_limits::quiet_NaN(); - if (coord == 0) { - // Cartesian - vol = 1.0_rt; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - vol *= dx[idim]; - } - } else if (coord == 1) { - // axisymmetric V = pi (r_r**2 - r_l**2) * dz - // = pi dr * dz * (r_r + r_l) - // = 2 pi r dr dz - vol = 2 * pi * p[0] * dx[0] * dx[1]; - } else if (coord == 2) { - // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) - Real r_r = problo[0]+static_cast(i+1)*dx[0]; - Real r_l = problo[0]+static_cast(i)*dx[0]; - vol = (4.0_rt/3.0_rt) * pi * dx[0] * (r_r*r_r + r_l*r_r + r_l*r_l); - } - - lsum += fab(i,j,k) * vol; - } - } - } - } - } - } + auto const& ima = mask.const_arrays(); + auto const& ma = mf.const_arrays(); + lsum += ParReduce(TypeList{}, TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + return { (ima[bno](i,j,k) == 0) ? ma[bno](i,j,k)*f_vol(i) : 0._rt }; + }); } else { const MultiFab& mf = pf.get(ilev, var_name); - for (MFIter mfi(mf); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); - if (bx.ok()) { - const auto& fab = mf.array(mfi); - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - Array p - = {AMREX_D_DECL(problo[0]+static_cast(i+0.5)*dx[0], - problo[1]+static_cast(j+0.5)*dx[1], - problo[2]+static_cast(k+0.5)*dx[2])}; - - // compute the volume - Real vol = std::numeric_limits::quiet_NaN(); - if (coord == 0) { - // Cartesian - vol = 1.0_rt; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - vol *= dx[idim]; - } - } else if (coord == 1) { - // axisymmetric V = pi (r_r**2 - r_l**2) * dz - // = pi dr * dz * (r_r + r_l) - // = 2 pi r dr dz - vol = 2 * pi * p[0] * dx[0] * dx[1]; - } else if (coord == 2) { - // 1-d spherical V = 4/3 pi (r_r**3 - r_l**3) - Real r_r = problo[0]+static_cast(i+1)*dx[0]; - Real r_l = problo[0]+static_cast(i)*dx[0]; - vol = (4.0_rt/3.0_rt) * pi * dx[0] * (r_r*r_r + r_l*r_r + r_l*r_l); - } - - lsum += fab(i,j,k) * vol; - } - } - } - } - } + auto const& ma = mf.const_arrays(); + lsum += ParReduce(TypeList{}, TypeList{}, mf, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + -> GpuTuple + { + return { ma[bno](i,j,k)*f_vol(i) }; + }); } } ParallelDescriptor::ReduceRealSum(lsum); - if (ParallelDescriptor::IOProcessor()) { std::cout << "integral of " << var_name << " = " << lsum << std::endl; From 15a0bb9a8c1b34136632b16c5511375e9d56b184 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 14 Nov 2023 19:36:40 -0800 Subject: [PATCH 037/186] SYCL: Use get_multi_ptr instead of get_pointer (#3630) The latter has been deprecated in SYCL 2020. --- .github/workflows/intel.yml | 3 +-- Src/Base/AMReX_GpuLaunchFunctsG.H | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 6133e666fad..d86035d916e 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -226,10 +226,9 @@ jobs: -DCMAKE_VERBOSE_MAKEFILE=ON \ -DAMReX_EB=ON \ -DAMReX_ENABLE_TESTS=ON \ - -DAMReX_FORTRAN=ON \ + -DAMReX_FORTRAN=OFF \ -DCMAKE_C_COMPILER=$(which icc) \ -DCMAKE_CXX_COMPILER=$(which icpc) \ - -DCMAKE_Fortran_COMPILER=$(which ifort) \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache cmake --build build --parallel 2 cmake --build build --target install diff --git a/Src/Base/AMReX_GpuLaunchFunctsG.H b/Src/Base/AMReX_GpuLaunchFunctsG.H index aea0c030152..78e9e856535 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsG.H +++ b/Src/Base/AMReX_GpuLaunchFunctsG.H @@ -36,7 +36,7 @@ void launch (int nblocks, int nthreads_per_block, std::size_t shared_mem_bytes, [=] (sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] { - f(Gpu::Handler{&item,shared_data.get_pointer()}); + f(Gpu::Handler{&item,shared_data.get_multi_ptr().get()}); }); }); } catch (sycl::exception const& ex) { @@ -82,7 +82,7 @@ void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream, [[sycl::reqd_work_group_size(1,1,MT)]] [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] { - f(Gpu::Handler{&item,shared_data.get_pointer()}); + f(Gpu::Handler{&item,shared_data.get_multi_ptr().get()}); }); }); } catch (sycl::exception const& ex) { @@ -210,7 +210,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept i < n; i += stride) { int n_active_threads = amrex::min(n-i+(T)item.get_local_id(0), (T)item.get_local_range(0)); - detail::call_f(f, i, Gpu::Handler{&item, shared_data.get_pointer(), + detail::call_f(f, i, Gpu::Handler{&item, shared_data.get_multi_ptr().get(), n_active_threads}); } }); @@ -269,7 +269,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept k += lo.z; int n_active_threads = amrex::min(ncells-icell+(int)item.get_local_id(0), (int)item.get_local_range(0)); - detail::call_f(f, i, j, k, Gpu::Handler{&item, shared_data.get_pointer(), + detail::call_f(f, i, j, k, Gpu::Handler{&item, shared_data.get_multi_ptr().get(), n_active_threads}); } }); @@ -335,7 +335,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) n int n_active_threads = amrex::min(ncells-icell+(int)item.get_local_id(0), (int)item.get_local_range(0)); detail::call_f(f, i, j, k, ncomp, - Gpu::Handler{&item, shared_data.get_pointer(), + Gpu::Handler{&item, shared_data.get_multi_ptr().get(), n_active_threads}); } }); From a9da2a54de5d704dda746c8b036de469ea19f4e9 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 15 Nov 2023 12:56:44 -0800 Subject: [PATCH 038/186] SUNDIALS: Use sunrealtype instead of realtype (#3632) The latter has been deprecated. --- Src/Extern/SUNDIALS/AMReX_Sundials.H | 4 +- .../SUNDIALS/AMReX_SundialsIntegrator.H | 44 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/Src/Extern/SUNDIALS/AMReX_Sundials.H b/Src/Extern/SUNDIALS/AMReX_Sundials.H index 7bd09795d50..141291e4a21 100644 --- a/Src/Extern/SUNDIALS/AMReX_Sundials.H +++ b/Src/Extern/SUNDIALS/AMReX_Sundials.H @@ -6,7 +6,7 @@ #include #include -static_assert(std::is_same::value, - "amrex::Real must be the same as SUNDIALS realtype"); +static_assert(std::is_same::value, + "amrex::Real must be the same as SUNDIALS sunrealtype"); #endif diff --git a/Src/Extern/SUNDIALS/AMReX_SundialsIntegrator.H b/Src/Extern/SUNDIALS/AMReX_SundialsIntegrator.H index 2623f832590..8066e9e8840 100644 --- a/Src/Extern/SUNDIALS/AMReX_SundialsIntegrator.H +++ b/Src/Extern/SUNDIALS/AMReX_SundialsIntegrator.H @@ -15,48 +15,48 @@ #include /* access to SPGMR SUNLinearSolver */ #include /* access to SPGMR SUNLinearSolver */ #include /* access to FixedPoint SUNNonlinearSolver */ -#include /* defs. of realtype, sunindextype, etc */ +#include /* defs. of sunrealtype, sunindextype, etc */ namespace amrex { struct SundialsUserData { - std::function f0; - std::function f_fast; - std::function f; - /* std::function StoreStage; */ - std::function ProcessStage; - std::function PostStoreStage; + std::function f0; + std::function f_fast; + std::function f; + /* std::function StoreStage; */ + std::function ProcessStage; + std::function PostStoreStage; }; namespace SundialsUserFun { - static int f0 (realtype t, N_Vector y, N_Vector ydot, void *user_data) { + static int f0 (sunrealtype t, N_Vector y, N_Vector ydot, void *user_data) { SundialsUserData* udata = static_cast(user_data); return udata->f0(t, y, ydot, user_data); } - static int f_fast (realtype t, N_Vector y_data, N_Vector y_rhs, void *user_data) { + static int f_fast (sunrealtype t, N_Vector y_data, N_Vector y_rhs, void *user_data) { SundialsUserData* udata = static_cast(user_data); return udata->f_fast(t, y_data, y_rhs, user_data); } - static int f (realtype t, N_Vector y_data, N_Vector y_rhs, void *user_data) { + static int f (sunrealtype t, N_Vector y_data, N_Vector y_rhs, void *user_data) { SundialsUserData* udata = static_cast(user_data); return udata->f(t, y_data, y_rhs, user_data); } /* - static int StoreStage (realtype t, N_Vector* f_data, int nvecs, void *user_data) { + static int StoreStage (sunrealtype t, N_Vector* f_data, int nvecs, void *user_data) { SundialsUserData* udata = static_cast(user_data); return udata->StoreStage(t, f_data, nvecs, user_data); } */ - static int ProcessStage (realtype t, N_Vector y_data, void *user_data) { + static int ProcessStage (sunrealtype t, N_Vector y_data, void *user_data) { SundialsUserData* udata = static_cast(user_data); return udata->ProcessStage(t, y_data, user_data); } - static int PostStoreStage(realtype t, N_Vector y_data, void *user_data) { + static int PostStoreStage(sunrealtype t, N_Vector y_data, void *user_data) { SundialsUserData* udata = static_cast(user_data); return udata->PostStoreStage(t, y_data, user_data); } @@ -245,7 +245,7 @@ public: /* Begin Section: SUNDIALS FUNCTION HOOKS */ /* f routine to compute the ODE RHS function f(t,y). */ - udata.f = [&](realtype rhs_time, N_Vector y_data, N_Vector y_rhs, void * /* user_data */) -> int { + udata.f = [&](sunrealtype rhs_time, N_Vector y_data, N_Vector y_rhs, void * /* user_data */) -> int { amrex::Vector S_data; amrex::Vector S_rhs; @@ -265,7 +265,7 @@ public: return 0; }; - udata.ProcessStage = [&](realtype rhs_time, N_Vector y_data, void * /* user_data */) -> int { + udata.ProcessStage = [&](sunrealtype rhs_time, N_Vector y_data, void * /* user_data */) -> int { amrex::Vector S_data; const int num_vecs = N_VGetNumSubvectors_ManyVector(y_data); @@ -421,14 +421,14 @@ public: /* Begin Section: SUNDIALS FUNCTION HOOKS */ /* f0 routine to compute a zero-valued ODE RHS function f(t,y). */ - udata.f0 = [&](realtype /* rhs_time */, N_Vector /* y */, N_Vector ydot, void * /* user_data */) -> int { + udata.f0 = [&](sunrealtype /* rhs_time */, N_Vector /* y */, N_Vector ydot, void * /* user_data */) -> int { // Initialize ydot to zero and return N_VConst(0.0, ydot); return 0; }; /* f routine to compute the ODE RHS function f(t,y). */ - udata.f_fast = [&](realtype rhs_time, N_Vector y_data, N_Vector y_rhs, void * /* user_data */) -> int { + udata.f_fast = [&](sunrealtype rhs_time, N_Vector y_data, N_Vector y_rhs, void * /* user_data */) -> int { amrex::Vector S_data; amrex::Vector S_rhs; amrex::Vector S_stage_data; @@ -456,7 +456,7 @@ public: }; /* f routine to compute the ODE RHS function f(t,y). */ - udata.f = [&](realtype rhs_time, N_Vector y_data, N_Vector y_rhs, void * /* user_data */) -> int { + udata.f = [&](sunrealtype rhs_time, N_Vector y_data, N_Vector y_rhs, void * /* user_data */) -> int { amrex::Vector S_data; amrex::Vector S_rhs; @@ -476,7 +476,7 @@ public: return 0; }; - udata.ProcessStage = [&](realtype rhs_time, N_Vector y_data, void * /* user_data */) -> int { + udata.ProcessStage = [&](sunrealtype rhs_time, N_Vector y_data, void * /* user_data */) -> int { amrex::Vector S_data; const int num_vecs = N_VGetNumSubvectors_ManyVector(y_data); @@ -492,7 +492,7 @@ public: return 0; }; - udata.PostStoreStage = [&](realtype rhs_time, N_Vector y_data, void *user_data) -> int { + udata.PostStoreStage = [&](sunrealtype rhs_time, N_Vector y_data, void *user_data) -> int { udata.ProcessStage(rhs_time, y_data, user_data); for(int i=0; i int { + udata.f = [&](sunrealtype rhs_time, N_Vector y_data, N_Vector y_rhs, void * /* user_data */) -> int { amrex::Vector S_data; amrex::Vector S_rhs; @@ -713,7 +713,7 @@ public: return 0; }; - udata.ProcessStage = [&](realtype rhs_time, N_Vector y_data, void * /* user_data */) -> int { + udata.ProcessStage = [&](sunrealtype rhs_time, N_Vector y_data, void * /* user_data */) -> int { amrex::Vector S_data; const int num_vecs = N_VGetNumSubvectors_ManyVector(y_data); From d171635b3a54fb9e8896c9562bbedaa5f3a894cd Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Thu, 16 Nov 2023 08:49:21 -0800 Subject: [PATCH 039/186] Fix typo in `MLMGT::getGradSolution` when `MF` is different from `AMF` (#3631) ## Summary This PR fixes a small typo in `MLMGT::getGradSolution` that causes an error when `AMR` is distinct from `MF`. A test demonstrating the fix is added for the case where `MF`=`MultiFab` and `AMF`=`fMultiFab`. ## Additional background This typo was noticed when working on generalizing the `MLMG` template to the WIP class `FabArraySet`. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX --- Src/LinearSolvers/MLMG/AMReX_MLMG.H | 2 +- Tests/LinearSolvers/EBflux_grad/MyTest.H | 1 + Tests/LinearSolvers/EBflux_grad/MyTest.cpp | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 7fc822afa23..f1fed2d3db3 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -545,7 +545,7 @@ MLMGT::getGradSolution (const Vector >& a_grad_so } linop.compGrad(alev, GetArrOfPtrs(grad_sol), sol[alev], a_loc); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - a_grad_sol[alev][idim]->LocalCopy(grad_sol, 0, 0, ncomp, IntVect(0)); + a_grad_sol[alev][idim]->LocalCopy(grad_sol[idim], 0, 0, ncomp, IntVect(0)); } } } diff --git a/Tests/LinearSolvers/EBflux_grad/MyTest.H b/Tests/LinearSolvers/EBflux_grad/MyTest.H index 17ccdec1604..f26ee4e1c0b 100644 --- a/Tests/LinearSolvers/EBflux_grad/MyTest.H +++ b/Tests/LinearSolvers/EBflux_grad/MyTest.H @@ -45,6 +45,7 @@ private: amrex::Vector phi; amrex::Vector> flux; amrex::Vector> grad; + amrex::Vector> fgrad; // Used to test getGradSolution when MF is different from AMF amrex::Vector rhs; amrex::Vector acoef; amrex::Vector > bcoef; diff --git a/Tests/LinearSolvers/EBflux_grad/MyTest.cpp b/Tests/LinearSolvers/EBflux_grad/MyTest.cpp index 782f3eece39..c814536203d 100644 --- a/Tests/LinearSolvers/EBflux_grad/MyTest.cpp +++ b/Tests/LinearSolvers/EBflux_grad/MyTest.cpp @@ -73,6 +73,7 @@ MyTest::solve () mlmg.solve(amrex::GetVecOfPtrs(phi), amrex::GetVecOfConstPtrs(rhs), tol_rel, tol_abs); mlmg.getFluxes(amrex::GetVecOfArrOfPtrs(flux)); mlmg.getGradSolution(amrex::GetVecOfArrOfPtrs(grad)); + mlmg.getGradSolution(amrex::GetVecOfArrOfPtrs(fgrad)); // Test when MF is different from AMF for (int ilev = 0; ilev <= max_level; ++ilev) { amrex::VisMF::Write(phi[0], "phi-"+std::to_string(ilev)); } @@ -146,6 +147,7 @@ MyTest::initData () bcoef.resize(nlevels); flux.resize(1); grad.resize(1); + fgrad.resize(1); for (int ilev = 0; ilev < nlevels; ++ilev) { dmap[ilev].define(grids[ilev]); @@ -193,5 +195,7 @@ MyTest::initData () dmap[0], 1, 0, MFInfo(), *factory[0]); grad[0][idim].define(amrex::convert(grids[0],IntVect::TheDimensionVector(idim)), dmap[0], 1, 0, MFInfo(), *factory[0]); + fgrad[0][idim].define(amrex::convert(grids[0],IntVect::TheDimensionVector(idim)), + dmap[0], 1, 0, MFInfo(), DefaultFabFactory>()); } } From d93f34416da3dfb5b50e5a8b0cbb3b6f78bd0569 Mon Sep 17 00:00:00 2001 From: Ankith Anil Das <46082093+ankithadas@users.noreply.github.com> Date: Fri, 17 Nov 2023 04:57:41 +1100 Subject: [PATCH 040/186] Bug fix for amrex::Subtract when called with interger nghost (#3634) ## Summary `amrex::Subtract` leads to an infinite recursion when invoked with an integer value for nghost. To address this issue, explicit casting of the integer to `IntVect` has been incorporated. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_FabArrayUtility.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H index 78f3355d34a..ca80a070f45 100644 --- a/Src/Base/AMReX_FabArrayUtility.H +++ b/Src/Base/AMReX_FabArrayUtility.H @@ -1103,7 +1103,7 @@ template & dst, FabArray const& src, int srccomp, int dstcomp, int numcomp, int nghost) { - Subtract(dst,src,srccomp,dstcomp,numcomp,nghost); + Subtract(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost)); } template Date: Fri, 17 Nov 2023 10:22:21 -0800 Subject: [PATCH 041/186] solve_bicgstab: cut use of `s` (#3629) ## Summary The MF named `s` seems to be an unnecessary usage of memory as the residue `r` can fulfill its roles in the algorithm. This PR replaces `s` with `r` and `LinComb` with `Saxpy` as appropriate. ## Additional background This PR is part of a larger request to improve `solve_bicgstab` and `solve_cg`. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index c99d7b319bd..da50aae7b89 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -98,7 +98,6 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) MF sorig = Lp.make(amrlev, mglev, nghost); MF p = Lp.make(amrlev, mglev, nghost); MF r = Lp.make(amrlev, mglev, nghost); - MF s = Lp.make(amrlev, mglev, nghost); MF rh = Lp.make(amrlev, mglev, nghost); MF v = Lp.make(amrlev, mglev, nghost); MF t = Lp.make(amrlev, mglev, nghost); @@ -166,9 +165,9 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) ret = 2; break; } MF::Saxpy(sol, alpha, ph, 0, 0, ncomp, nghost); // sol += alpha * ph - MF::LinComb(s, RT(1.0), r, 0, -alpha, v, 0, 0, ncomp, nghost); // s = r - alpha * v + MF::Saxpy(r, -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v - rnorm = norm_inf(s); + rnorm = norm_inf(r); if ( verbose > 2 && ParallelDescriptor::IOProcessor() ) { @@ -180,7 +179,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) { break; } - sh.LocalCopy(s,0,0,ncomp,nghost); + sh.LocalCopy(r,0,0,ncomp,nghost); Lp.apply(amrlev, mglev, t, sh, MLLinOpT::BCMode::Homogeneous, MLLinOpT::StateMode::Correction); Lp.normalize(amrlev, mglev, t); // @@ -188,7 +187,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) // in the following two dotxy()s. We do that by calculating the "local" // values and then reducing the two local values at the same time. // - RT tvals[2] = { dotxy(t,t,true), dotxy(t,s,true) }; + RT tvals[2] = { dotxy(t,t,true), dotxy(t,r,true) }; BL_PROFILE_VAR("MLCGSolver::ParallelAllReduce", blp_par); ParallelAllReduce::Sum(tvals,2,Lp.BottomCommunicator()); @@ -203,7 +202,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) ret = 3; break; } MF::Saxpy(sol, omega, sh, 0, 0, ncomp, nghost); // sol += omega * sh - MF::LinComb(r, RT(1.0), s, 0, -omega, t, 0, 0, ncomp, nghost); // r = s - omega * t + MF::Saxpy(r, -omega, t, 0, 0, ncomp, nghost); // r += -omega * t rnorm = norm_inf(r); From d75c04b422dae567c9d3c74482b5be84d002ee57 Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Mon, 20 Nov 2023 13:57:50 -0800 Subject: [PATCH 042/186] solve_bicgstab: use fewer MFs (#3635) ## Summary This PR cuts the number of MFs used in `solve_bicgstab`, saving on memory and LocalCopy operations. In particular, the MFs `ph` and `sh` are removed. ## Additional background This is a follow up to avoid-use-of-s and other PRs to improve `solve_bicgstab`. My own testing has shown that this PR gives the same results as before, but regression testing should be done to verify this in all cases. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 25 ++++++++++------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index da50aae7b89..4768f06fc41 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -90,14 +90,12 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) const int ncomp = sol.nComp(); - MF ph = Lp.make(amrlev, mglev, sol.nGrowVect()); - MF sh = Lp.make(amrlev, mglev, sol.nGrowVect()); - ph.setVal(RT(0.0)); - sh.setVal(RT(0.0)); + MF p = Lp.make(amrlev, mglev, sol.nGrowVect()); + MF r = Lp.make(amrlev, mglev, sol.nGrowVect()); + p.setVal(RT(0.0)); // Make sure all entries are initialized to avoid errors + r.setVal(RT(0.0)); MF sorig = Lp.make(amrlev, mglev, nghost); - MF p = Lp.make(amrlev, mglev, nghost); - MF r = Lp.make(amrlev, mglev, nghost); MF rh = Lp.make(amrlev, mglev, nghost); MF v = Lp.make(amrlev, mglev, nghost); MF t = Lp.make(amrlev, mglev, nghost); @@ -151,8 +149,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) MF::Saxpy(p, -omega, v, 0, 0, ncomp, nghost); // p += -omega*v MF::Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta*p } - ph.LocalCopy(p,0,0,ncomp,nghost); - Lp.apply(amrlev, mglev, v, ph, MLLinOpT::BCMode::Homogeneous, MLLinOpT::StateMode::Correction); + Lp.apply(amrlev, mglev, v, p, MLLinOpT::BCMode::Homogeneous, MLLinOpT::StateMode::Correction); Lp.normalize(amrlev, mglev, v); RT rhTv = dotxy(rh,v); @@ -164,9 +161,10 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) { ret = 2; break; } - MF::Saxpy(sol, alpha, ph, 0, 0, ncomp, nghost); // sol += alpha * ph - MF::Saxpy(r, -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v + MF::Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p + MF::Saxpy(r, -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v + rnorm = norm_inf(r); rnorm = norm_inf(r); if ( verbose > 2 && ParallelDescriptor::IOProcessor() ) @@ -179,8 +177,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) { break; } - sh.LocalCopy(r,0,0,ncomp,nghost); - Lp.apply(amrlev, mglev, t, sh, MLLinOpT::BCMode::Homogeneous, MLLinOpT::StateMode::Correction); + Lp.apply(amrlev, mglev, t, r, MLLinOpT::BCMode::Homogeneous, MLLinOpT::StateMode::Correction); Lp.normalize(amrlev, mglev, t); // // This is a little funky. I want to elide one of the reductions @@ -201,8 +198,8 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) { ret = 3; break; } - MF::Saxpy(sol, omega, sh, 0, 0, ncomp, nghost); // sol += omega * sh - MF::Saxpy(r, -omega, t, 0, 0, ncomp, nghost); // r += -omega * t + MF::Saxpy(sol, omega, r, 0, 0, ncomp, nghost); // sol += omega * r + MF::Saxpy(r, -omega, t, 0, 0, ncomp, nghost); // r += -omega * t rnorm = norm_inf(r); From c51e0a3071f3f1d2a11b6c339bff58d4ba5e30e8 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 20 Nov 2023 13:59:06 -0800 Subject: [PATCH 043/186] CI: Ascent + Conduit (#3639) ## Summary For some reason, `AMReX_ASCENT` does not trigger `AMReX_CONDUIT` to be set, too. cc @cyrush ## Additional background X-ref: #3350 --- .github/workflows/ascent.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ascent.yml b/.github/workflows/ascent.yml index 83d2f7ebac3..d8217621a1b 100644 --- a/.github/workflows/ascent.yml +++ b/.github/workflows/ascent.yml @@ -26,7 +26,8 @@ jobs: -DCMAKE_BUILD_TYPE=Debug \ -DAMReX_ENABLE_TESTS=ON \ -DAMReX_FORTRAN=OFF \ - -DAMReX_ASCENT=ON + -DAMReX_ASCENT=ON \ + -DAMReX_CONDUIT=ON - name: Build run: | . /ascent_docker_setup_env.sh From 9e35dc19489dc5d312e92781cb0471d282cf8370 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 20 Nov 2023 21:04:03 -0800 Subject: [PATCH 044/186] Ascent: SoA Particle Support (#3350) ## Summary Add support for pure SoA layouted particle containers for Ascent. ## Additional background Follow-up to #2878. ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Andrew Myers --- Src/Extern/Conduit/AMReX_Conduit_Blueprint.H | 5 +- .../AMReX_Conduit_Blueprint_ParticlesI.H | 207 ++++++++---- .../Ascent_Insitu_SOA/CMakeLists.txt | 13 + Tests/Particles/Ascent_Insitu_SOA/GNUmakefile | 24 ++ Tests/Particles/Ascent_Insitu_SOA/inputs.rt | 10 + Tests/Particles/Ascent_Insitu_SOA/main.cpp | 306 ++++++++++++++++++ 6 files changed, 491 insertions(+), 74 deletions(-) create mode 100644 Tests/Particles/Ascent_Insitu_SOA/CMakeLists.txt create mode 100644 Tests/Particles/Ascent_Insitu_SOA/GNUmakefile create mode 100644 Tests/Particles/Ascent_Insitu_SOA/inputs.rt create mode 100644 Tests/Particles/Ascent_Insitu_SOA/main.cpp diff --git a/Src/Extern/Conduit/AMReX_Conduit_Blueprint.H b/Src/Extern/Conduit/AMReX_Conduit_Blueprint.H index 6d23bcf07ed..9ac8eb31fd0 100644 --- a/Src/Extern/Conduit/AMReX_Conduit_Blueprint.H +++ b/Src/Extern/Conduit/AMReX_Conduit_Blueprint.H @@ -96,9 +96,8 @@ namespace amrex // coordset and fields used to represent the passed particle container. // This allows you to use unique names to wrap multiple particle containers // into a single blueprint tree. - template - void ParticleContainerToBlueprint (const ParticleContainer + void ParticleContainerToBlueprint (const ParticleContainer_impl &pc, const Vector &real_comp_names, diff --git a/Src/Extern/Conduit/AMReX_Conduit_Blueprint_ParticlesI.H b/Src/Extern/Conduit/AMReX_Conduit_Blueprint_ParticlesI.H index f2d7d1ed2da..e4186ba247b 100644 --- a/Src/Extern/Conduit/AMReX_Conduit_Blueprint_ParticlesI.H +++ b/Src/Extern/Conduit/AMReX_Conduit_Blueprint_ParticlesI.H @@ -20,10 +20,9 @@ namespace amrex // Note: // This is a helper function, it's not part of the AMReX Blueprint Interface. //---------------------------------------------------------------------------// -template +template void -ParticleTileToBlueprint(const ParticleTile, +ParticleTileToBlueprint(const ParticleTile &ptile, const Vector &real_comp_names, @@ -31,15 +30,11 @@ ParticleTileToBlueprint(const ParticleTile); + int num_particles = ptile.size(); // knowing the above, we can zero copy the x,y,z positions + id, cpu // and any user fields in the AOS - // get the first particle's struct - const auto &pstruct = ptile.GetArrayOfStructs(); - // setup a blueprint description for the particle mesh // create a coordinate set std::string coordset_name = topology_name + "_coords"; @@ -63,29 +58,56 @@ ParticleTileToBlueprint(const ParticleTile(pstruct.data()); - char* pbuf = const_cast(pbuf_const); + if constexpr(ParticleType::is_soa_particle) + { + amrex::ignore_unused(pbuf); - ParticleReal* xp = reinterpret_cast(pbuf); pbuf += sizeof(ParticleReal); - n_coords["values/x"].set_external(xp, - num_particles, - 0, - struct_size); + const auto &soa = ptile.GetStructOfArrays(); + + // for soa entries, we can use standard strides, + // since these are contiguous arrays + + n_coords["values/x"].set_external(const_cast(&soa.GetRealData(0)[0]), + num_particles); #if AMREX_SPACEDIM > 1 - ParticleReal* yp = reinterpret_cast(pbuf); pbuf += sizeof(ParticleReal); - n_coords["values/y"].set_external(yp, - num_particles, - 0, - struct_size); + n_coords["values/y"].set_external(const_cast(&soa.GetRealData(1)[0]), + num_particles); #endif #if AMREX_SPACEDIM > 2 - ParticleReal* zp = reinterpret_cast(pbuf); pbuf += sizeof(ParticleReal); - n_coords["values/z"].set_external(zp, - num_particles, - 0, - struct_size); + n_coords["values/z"].set_external(const_cast(&soa.GetRealData(2)[0]), + num_particles); +#endif + } else + { + // get the first particle's struct + const auto &pstruct = ptile.GetArrayOfStructs(); + const int struct_size = sizeof(ParticleType); + + const char* pbuf_const = reinterpret_cast(pstruct.data()); + pbuf = const_cast(pbuf_const); + + ParticleReal* xp = reinterpret_cast(pbuf); pbuf += sizeof(ParticleReal); + n_coords["values/x"].set_external(xp, + num_particles, + 0, + struct_size); +#if AMREX_SPACEDIM > 1 + ParticleReal* yp = reinterpret_cast(pbuf); pbuf += sizeof(ParticleReal); + n_coords["values/y"].set_external(yp, + num_particles, + 0, + struct_size); +#endif +#if AMREX_SPACEDIM > 2 + ParticleReal* zp = reinterpret_cast(pbuf); pbuf += sizeof(ParticleReal); + n_coords["values/z"].set_external(zp, + num_particles, + 0, + struct_size); #endif + } // fields conduit::Node &n_fields = res["fields"]; @@ -95,20 +117,26 @@ ParticleTileToBlueprint(const ParticleTile(pbuf); pbuf += sizeof(ParticleReal); - conduit::Node &n_f = n_fields[real_comp_names.at(vname_real_idx)]; - n_f["topology"] = topology_name; - n_f["association"] = "element"; - n_f["values"].set_external(val, - num_particles, - 0, - struct_size); + constexpr int struct_size = sizeof(ParticleType); + constexpr int NStructReal = ParticleType::NReal; - vname_real_idx++; + // struct real fields, the first set are always the particle positions + // which we wrap above + for (int i = 0; i < NStructReal; i++) + { + ParticleReal* val = reinterpret_cast(pbuf); pbuf += sizeof(ParticleReal); + conduit::Node &n_f = n_fields[real_comp_names.at(vname_real_idx)]; + n_f["topology"] = topology_name; + n_f["association"] = "element"; + n_f["values"].set_external(val, + num_particles, + 0, + struct_size); + + vname_real_idx++; + } } //----------------------------------// @@ -116,44 +144,77 @@ ParticleTileToBlueprint(const ParticleTile(pbuf); pbuf += sizeof(int); - conduit::Node &n_f_id = n_fields[topology_name + "_id"]; + if constexpr(!ParticleType::is_soa_particle) + { + const int struct_size = sizeof(ParticleType); + + // id is the first int entry + int* id = reinterpret_cast(pbuf); pbuf += sizeof(int); + conduit::Node &n_f_id = n_fields[topology_name + "_id"]; + + n_f_id["topology"] = topology_name; + n_f_id["association"] = "element"; + n_f_id["values"].set_external(id, + num_particles, + 0, + struct_size); + + // cpu is the second int entry + int* cpu = reinterpret_cast(pbuf); pbuf += sizeof(int); + conduit::Node &n_f_cpu = n_fields[topology_name + "_cpu"]; + + n_f_cpu["topology"] = topology_name; + n_f_cpu["association"] = "element"; + n_f_cpu["values"].set_external(cpu, + num_particles, + 0, + struct_size); + } else { + const auto &soa = ptile.GetStructOfArrays(); + + // for soa entries, we can use standard strides, + // since these are contiguous arrays - n_f_id["topology"] = topology_name; - n_f_id["association"] = "element"; - n_f_id["values"].set_external(id, - num_particles, - 0, - struct_size); + // id is the first int entry + conduit::Node &n_f_id = n_fields[topology_name + "_id"]; - // cpu is the second int entry - int* cpu = reinterpret_cast(pbuf); pbuf += sizeof(int); - conduit::Node &n_f_cpu = n_fields[topology_name + "_cpu"]; + n_f_id["topology"] = topology_name; + n_f_id["association"] = "element"; + n_f_id["values"].set_external(const_cast(&soa.GetIntData(0)[0]), + num_particles); - n_f_cpu["topology"] = topology_name; - n_f_cpu["association"] = "element"; - n_f_cpu["values"].set_external(cpu, - num_particles, - 0, - struct_size); + // cpu is the second int entry + conduit::Node &n_f_cpu = n_fields[topology_name + "_cpu"]; + + n_f_cpu["topology"] = topology_name; + n_f_cpu["association"] = "element"; + n_f_cpu["values"].set_external(const_cast(&soa.GetIntData(0)[0]), + num_particles); + + } // -------------------------------- // user defined, integer aos fields // -------------------------------- int vname_int_idx = 0; - for (int i = 0; i < NStructInt; i++) + if constexpr(!ParticleType::is_soa_particle) { - int* val = reinterpret_cast(pbuf); pbuf += sizeof(int); - conduit::Node &n_f = n_fields[int_comp_names.at(vname_int_idx)]; - n_f["topology"] = topology_name; - n_f["association"] = "element"; - n_f["values"].set_external(val, - num_particles, - 0, - struct_size); - vname_int_idx++; + constexpr int struct_size = sizeof(ParticleType); + constexpr int NStructInt = ParticleType::NInt; + + for (int i = 0; i < NStructInt; i++) + { + int* val = reinterpret_cast(pbuf); pbuf += sizeof(int); + conduit::Node &n_f = n_fields[int_comp_names.at(vname_int_idx)]; + n_f["topology"] = topology_name; + n_f["association"] = "element"; + n_f["values"].set_external(val, + num_particles, + 0, + struct_size); + vname_int_idx++; + } } // ------------------------- @@ -193,10 +254,9 @@ ParticleTileToBlueprint(const ParticleTile +template void -ParticleContainerToBlueprint(const ParticleContainer &pc, const Vector &real_comp_names, @@ -209,8 +269,13 @@ ParticleContainerToBlueprint(const ParticleContainer; + using MyParConstIter = ParConstIter_impl; // // blueprint expects unique ids for each domain published diff --git a/Tests/Particles/Ascent_Insitu_SOA/CMakeLists.txt b/Tests/Particles/Ascent_Insitu_SOA/CMakeLists.txt new file mode 100644 index 00000000000..82216a02af8 --- /dev/null +++ b/Tests/Particles/Ascent_Insitu_SOA/CMakeLists.txt @@ -0,0 +1,13 @@ +if ( NOT AMReX_ASCENT ) + return () +endif () + +foreach(D IN LISTS AMReX_SPACEDIM) + set(_sources main.cpp) + set(_input_files inputs.rt ) + + setup_test(${D} _sources _input_files NTASKS 2) + + unset(_sources) + unset(_input_files) +endforeach() diff --git a/Tests/Particles/Ascent_Insitu_SOA/GNUmakefile b/Tests/Particles/Ascent_Insitu_SOA/GNUmakefile new file mode 100644 index 00000000000..660e4a13f22 --- /dev/null +++ b/Tests/Particles/Ascent_Insitu_SOA/GNUmakefile @@ -0,0 +1,24 @@ +AMREX_HOME = ../../../ + +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +USE_MPI = TRUE +USE_OMP = FALSE +USE_CUDA = FALSE + +TINY_PROFILE = TRUE +USE_PARTICLES = TRUE +USE_ASCENT = TRUE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/Particle/Make.package +include $(AMREX_HOME)/Src/Extern/Conduit/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/Particles/Ascent_Insitu_SOA/inputs.rt b/Tests/Particles/Ascent_Insitu_SOA/inputs.rt new file mode 100644 index 00000000000..e34fda14923 --- /dev/null +++ b/Tests/Particles/Ascent_Insitu_SOA/inputs.rt @@ -0,0 +1,10 @@ +ascent.size = (32, 64, 64) +ascent.max_grid_size = 32 +ascent.is_periodic = 1 +ascent.num_ppc = 1 +ascent.nlevs = 1 + +ascent.num_runtime_real = 0 +ascent.num_runtime_int = 0 + +particles.do_tiling = 1 diff --git a/Tests/Particles/Ascent_Insitu_SOA/main.cpp b/Tests/Particles/Ascent_Insitu_SOA/main.cpp new file mode 100644 index 00000000000..46e2af98422 --- /dev/null +++ b/Tests/Particles/Ascent_Insitu_SOA/main.cpp @@ -0,0 +1,306 @@ +#include +#include +#include + +#if !defined(AMREX_PARTICLES) || !defined(AMREX_USE_CONDUIT) +#error Incompatible AMReX library configuration! This tutorial requires AMREX_PARTICLES and AMREX_USE_CONDUIT +#endif + +#include + +#include + +#include + + +using namespace amrex; + +static constexpr int NR = 7; +static constexpr int NI = 4; + +int num_runtime_real = 0; +int num_runtime_int = 0; + +bool remove_negative = true; + +void get_position_unit_cell (Real* r, const IntVect& nppc, int i_part) +{ + int nx = nppc[0]; +#if AMREX_SPACEDIM > 1 + int ny = nppc[1]; +#else + int ny = 1; +#endif +#if AMREX_SPACEDIM > 2 + int nz = nppc[2]; +#else + int nz = 1; +#endif + + int ix_part = i_part/(ny * nz); + int iy_part = (i_part % (ny * nz)) % ny; + int iz_part = (i_part % (ny * nz)) / ny; + + r[0] = (0.5+ix_part)/nx; + r[1] = (0.5+iy_part)/ny; + r[2] = (0.5+iz_part)/nz; +} + +class TestParticleContainer + : public amrex::ParticleContainerPureSoA +{ + +public: + + TestParticleContainer (const Vector & a_geom, + const Vector & a_dmap, + const Vector & a_ba, + const Vector & a_rr) + : amrex::ParticleContainerPureSoA(a_geom, a_dmap, a_ba, a_rr) + { + for (int i = 0; i < num_runtime_real; ++i) + { + AddRealComp(true); + } + for (int i = 0; i < num_runtime_int; ++i) + { + AddIntComp(true); + } + } + + void InitParticles (const amrex::IntVect& a_num_particles_per_cell) + { + BL_PROFILE("InitParticles"); + + const int lev = 0; // only add particles on level 0 + const Real* dx = Geom(lev).CellSize(); + const Real* plo = Geom(lev).ProbLo(); + + const int num_ppc = AMREX_D_TERM( a_num_particles_per_cell[0], + *a_num_particles_per_cell[1], + *a_num_particles_per_cell[2]); + + for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + const Box& tile_box = mfi.tilebox(); + + std::array, NR> host_real; + std::array, NI> host_int; + + std::vector > host_runtime_real(NumRuntimeRealComps()); + std::vector > host_runtime_int(NumRuntimeIntComps()); + + for (IntVect iv = tile_box.smallEnd(); iv <= tile_box.bigEnd(); tile_box.next(iv)) + { + for (int i_part=0; i_part(id)); + host_int[1].push_back(ParallelDescriptor::MyProc()); + host_real[0].push_back(static_cast (plo[0] + (iv[0] + r[0])*dx[0])); +#if AMREX_SPACEDIM > 1 + host_real[1].push_back(static_cast (plo[1] + (iv[1] + r[1])*dx[1])); +#endif +#if AMREX_SPACEDIM > 2 + host_real[2].push_back(static_cast (plo[2] + (iv[2] + r[2])*dx[2])); +#endif + + for (int i = AMREX_SPACEDIM; i < NR; ++i) + host_real[i].push_back(static_cast(id)); + for (int i = 2; i < NI; ++i) + host_int[i].push_back(static_cast(id)); + for (int i = 0; i < NumRuntimeRealComps(); ++i) + host_runtime_real[i].push_back(static_cast(id)); + for (int i = 0; i < NumRuntimeIntComps(); ++i) + host_runtime_int[i].push_back(static_cast(id)); + } + } + + auto& particle_tile = DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex()); + auto old_size = particle_tile.size(); + auto new_size = old_size + host_real[0].size(); + particle_tile.resize(new_size); + + auto& soa = particle_tile.GetStructOfArrays(); + for (int i = 0; i < NR; ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_real[i].begin(), + host_real[i].end(), + soa.GetRealData(i).begin() + old_size); + } + + for (int i = 0; i < NI; ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_int[i].begin(), + host_int[i].end(), + soa.GetIntData(i).begin() + old_size); + } + for (int i = 0; i < NumRuntimeRealComps(); ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_runtime_real[i].begin(), + host_runtime_real[i].end(), + soa.GetRealData(NR+i).begin() + old_size); + } + + for (int i = 0; i < NumRuntimeIntComps(); ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_runtime_int[i].begin(), + host_runtime_int[i].end(), + soa.GetIntData(NI+i).begin() + old_size); + } + + Gpu::streamSynchronize(); + } + + Redistribute(); + } +}; + +struct TestParams +{ + IntVect size; + int max_grid_size; + int num_ppc; + int is_periodic; + int nlevs; +}; + +void testAscent (); + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + amrex::Print() << "Running redistribute test \n"; + testAscent(); + + amrex::Finalize(); +} + +void get_test_params (TestParams& params, const std::string& prefix) +{ + ParmParse pp(prefix); + pp.get("size", params.size); + pp.get("max_grid_size", params.max_grid_size); + pp.get("num_ppc", params.num_ppc); + pp.get("is_periodic", params.is_periodic); + pp.get("nlevs", params.nlevs); + pp.query("num_runtime_real", num_runtime_real); + pp.query("num_runtime_int", num_runtime_int); +} + +void testAscent () +{ + BL_PROFILE("testAscent"); + TestParams params; + get_test_params(params, "ascent"); + + int is_per[BL_SPACEDIM]; + for (int & d : is_per) + d = params.is_periodic; + + Vector rr(params.nlevs-1); + for (int lev = 1; lev < params.nlevs; lev++) + rr[lev-1] = IntVect(AMREX_D_DECL(2,2,2)); + + RealBox real_box; + for (int n = 0; n < BL_SPACEDIM; n++) + { + real_box.setLo(n, 0.0); + real_box.setHi(n, params.size[n]); + } + + IntVect domain_lo(AMREX_D_DECL(0, 0, 0)); + IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1)); + const Box base_domain(domain_lo, domain_hi); + + Vector geom(params.nlevs); + geom[0].define(base_domain, &real_box, CoordSys::cartesian, is_per); + for (int lev = 1; lev < params.nlevs; lev++) { + geom[lev].define(amrex::refine(geom[lev-1].Domain(), rr[lev-1]), + &real_box, CoordSys::cartesian, is_per); + } + + Vector ba(params.nlevs); + Vector dm(params.nlevs); + auto lo = IntVect(AMREX_D_DECL(0, 0, 0)); + IntVect size = params.size; + for (int lev = 0; lev < params.nlevs; ++lev) + { + ba[lev].define(Box(lo, lo+params.size-1)); + ba[lev].maxSize(params.max_grid_size); + dm[lev].define(ba[lev]); + lo += size/2; + size *= 2; + } + + TestParticleContainer pc(geom, dm, ba, rr); + + int npc = params.num_ppc; + auto nppc = IntVect(AMREX_D_DECL(npc, npc, npc)); + + amrex::Print() << "About to initialize particles \n"; + + pc.InitParticles(nppc); + + { + conduit::Node bp_mesh; + /* TODO + amrex::MultiLevelToBlueprint( + nlev, + amrex::GetVecOfConstPtrs(mf), + varnames, + geom, + time, + iteration, + warpx.refRatio(), + bp_mesh + ); + */ + + // wrap pc for current species into a blueprint topology + std::string const prefix = "particle"; + Vector particle_varnames; + for (int i = 0; i < pc.NumRealComps(); ++i) { + particle_varnames.push_back(prefix + "_real_" + std::to_string(i)); + } + Vector particle_int_varnames; + for (int i = 0; i < pc.NumIntComps(); ++i) { + particle_int_varnames.push_back(prefix + "_int_" + std::to_string(i)); + } + + amrex::ParticleContainerToBlueprint(pc, + particle_varnames, + particle_int_varnames, + bp_mesh, + prefix); + // publish + ascent::Ascent ascent; + conduit::Node opts; + opts["exceptions"] = "catch"; + opts["mpi_comm"] = MPI_Comm_c2f(ParallelDescriptor::Communicator()); + ascent.open(opts); + ascent.publish(bp_mesh); + + // If you want to save blueprint HDF5 files w/o using an Ascent + // extract, you can call the following AMReX helper: + // const auto step = istep[0]; + // amrex::WriteBlueprintFiles(bp_mesh, "bp_export", step, "hdf5"); + + // render + conduit::Node actions; + ascent.execute(actions); + ascent.close(); + } + + // the way this test is set up, if we make it here we pass + amrex::Print() << "pass \n"; +} + From 60b45bc19b691c1187f4cfd9cd39b2744bf7e35a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 27 Nov 2023 13:57:39 -0800 Subject: [PATCH 045/186] MLEBABecLap: Support Robin BC at Domain Boundaries (#3617) --- .../MLMG/AMReX_MLABecLaplacian.H | 58 +++++++++++-------- Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H | 4 +- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.H | 13 +++-- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp | 10 ++++ Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 32 +++++----- 5 files changed, 71 insertions(+), 46 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H index 9498af0e620..5c90a4e21ff 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H @@ -190,15 +190,15 @@ public: Array const& flux, FAB const& sol, int face_only, int ncomp); -protected: - - bool m_needs_update = true; - RT m_a_scalar = std::numeric_limits::quiet_NaN(); RT m_b_scalar = std::numeric_limits::quiet_NaN(); Vector > m_a_coeffs; Vector > > m_b_coeffs; +protected: + + bool m_needs_update = true; + Vector m_is_singular; [[nodiscard]] bool supportRobinBC () const noexcept override { return true; } @@ -474,29 +474,29 @@ MLABecLaplacianT::applyMetricTermsCoeffs () // \tilde{alpha}_i = alpha_i + (1-B) beta_{i+1/2} / h^2 // \tilde{rhs}_i = rhs_i + A beta_{i+1/2} / h^2 // -template -void -MLABecLaplacianT::applyRobinBCTermsCoeffs () +namespace detail { +template +void applyRobinBCTermsCoeffs (LP& linop) { - if (!(this->hasRobinBC())) { return; } + using RT = typename LP::RT; - const int ncomp = this->getNComp(); + const int ncomp = linop.getNComp(); bool reset_alpha = false; - if (m_a_scalar == RT(0.0)) { - m_a_scalar = RT(1.0); + if (linop.m_a_scalar == RT(0.0)) { + linop.m_a_scalar = RT(1.0); reset_alpha = true; } - const RT bovera = m_b_scalar/m_a_scalar; + const RT bovera = linop.m_b_scalar/linop.m_a_scalar; - for (int amrlev = 0; amrlev < this->m_num_amr_levels; ++amrlev) { + for (int amrlev = 0; amrlev < linop.NAMRLevels(); ++amrlev) { const int mglev = 0; - const Box& domain = this->m_geom[amrlev][mglev].Domain(); - const RT dxi = static_cast(this->m_geom[amrlev][mglev].InvCellSize(0)); - const RT dyi = static_cast((AMREX_SPACEDIM >= 2) ? this->m_geom[amrlev][mglev].InvCellSize(1) : Real(1.0)); - const RT dzi = static_cast((AMREX_SPACEDIM == 3) ? this->m_geom[amrlev][mglev].InvCellSize(2) : Real(1.0)); + const Box& domain = linop.Geom(amrlev,mglev).Domain(); + const RT dxi = static_cast(linop.Geom(amrlev,mglev).InvCellSize(0)); + const RT dyi = static_cast((AMREX_SPACEDIM >= 2) ? linop.Geom(amrlev,mglev).InvCellSize(1) : Real(1.0)); + const RT dzi = static_cast((AMREX_SPACEDIM == 3) ? linop.Geom(amrlev,mglev).InvCellSize(2) : Real(1.0)); if (reset_alpha) { - m_a_coeffs[amrlev][mglev].setVal(RT(0.0)); + linop.m_a_coeffs[amrlev][mglev].setVal(RT(0.0)); } MFItInfo mfi_info; @@ -505,20 +505,20 @@ MLABecLaplacianT::applyRobinBCTermsCoeffs () #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(m_a_coeffs[amrlev][mglev], mfi_info); mfi.isValid(); ++mfi) + for (MFIter mfi(linop.m_a_coeffs[amrlev][mglev], mfi_info); mfi.isValid(); ++mfi) { const Box& vbx = mfi.validbox(); - auto const& afab = m_a_coeffs[amrlev][mglev].array(mfi); + auto const& afab = linop.m_a_coeffs[amrlev][mglev].array(mfi); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - auto const& bfab = m_b_coeffs[amrlev][mglev][idim].const_array(mfi); + auto const& bfab = linop.m_b_coeffs[amrlev][mglev][idim].const_array(mfi); const Box& blo = amrex::adjCellLo(vbx,idim); const Box& bhi = amrex::adjCellHi(vbx,idim); bool outside_domain_lo = !(domain.contains(blo)); bool outside_domain_hi = !(domain.contains(bhi)); if ((!outside_domain_lo) && (!outside_domain_hi)) { continue; } for (int icomp = 0; icomp < ncomp; ++icomp) { - auto const& rbc = (*(this->m_robin_bcval[amrlev]))[mfi].const_array(icomp*3); - if (this->m_lobc_orig[icomp][idim] == LinOpBCType::Robin && outside_domain_lo) + auto const& rbc = (*(linop.m_robin_bcval[amrlev]))[mfi].const_array(icomp*3); + if (linop.m_lobc_orig[icomp][idim] == LinOpBCType::Robin && outside_domain_lo) { if (idim == 0) { RT fac = bovera*dxi*dxi; @@ -546,7 +546,7 @@ MLABecLaplacianT::applyRobinBCTermsCoeffs () }); } } - if (this->m_hibc_orig[icomp][idim] == LinOpBCType::Robin && outside_domain_hi) + if (linop.m_hibc_orig[icomp][idim] == LinOpBCType::Robin && outside_domain_hi) { if (idim == 0) { RT fac = bovera*dxi*dxi; @@ -579,6 +579,16 @@ MLABecLaplacianT::applyRobinBCTermsCoeffs () } } } +} // namespace detail + +template +void +MLABecLaplacianT::applyRobinBCTermsCoeffs () +{ + if (this->hasRobinBC()) { + detail::applyRobinBCTermsCoeffs(*this); + } +} template void diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index 175f34ae54b..d4b3718212a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -131,6 +131,8 @@ public: RT location; }; + Vector > m_robin_bcval; + protected: bool m_has_metric_term = false; @@ -182,8 +184,6 @@ protected: }; Vector > > m_bcondloc; - Vector > m_robin_bcval; - // used to save interpolation coefficients of the first interior cells mutable Vector> > m_undrrelxr; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.H b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.H index 6187c479e4c..448a5aaedf4 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.H @@ -114,6 +114,8 @@ public: void getEBFluxes (const Vector& a_flux, const Vector& a_sol) const override; + void applyRobinBCTermsCoeffs (); + #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) [[nodiscard]] std::unique_ptr makeHypre (Hypre::Interface hypre_interface) const override; #endif @@ -122,6 +124,11 @@ public: [[nodiscard]] std::unique_ptr makePETSc () const override; #endif + Real m_a_scalar = std::numeric_limits::quiet_NaN(); + Real m_b_scalar = std::numeric_limits::quiet_NaN(); + Vector > m_a_coeffs; + Vector > > m_b_coeffs; + protected: int m_ncomp = 1; @@ -131,10 +138,6 @@ protected: Location m_beta_loc; // Location of coefficients: face centers or face centroids Location m_phi_loc; // Location of solution variable: cell centers or cell centroids - Real m_a_scalar = std::numeric_limits::quiet_NaN(); - Real m_b_scalar = std::numeric_limits::quiet_NaN(); - Vector > m_a_coeffs; - Vector > > m_b_coeffs; Vector > m_cc_mask; Vector > m_eb_phi; @@ -154,6 +157,8 @@ protected: const Vector& b_eb); void averageDownCoeffs (); void averageDownCoeffsToCoarseAmrLevel (int flev); + + [[nodiscard]] bool supportRobinBC () const noexcept override { return true; } }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp index b37537645e6..247eeeea25c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp @@ -685,6 +685,8 @@ MLEBABecLap::prepareForSolve () MLCellABecLap::prepareForSolve(); + applyRobinBCTermsCoeffs(); + averageDownCoeffs(); if (m_eb_phi[0]) { @@ -1285,6 +1287,14 @@ MLEBABecLap::getEBFluxes (const Vector& a_flux, const VectorhasRobinBC()) { + detail::applyRobinBCTermsCoeffs(*this); + } +} + #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) std::unique_ptr MLEBABecLap::makeHypre (Hypre::Interface hypre_interface) const diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index 706fe679d7e..a889f7514af 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -488,6 +488,22 @@ public: [[nodiscard]] bool isMFIterSafe (int amrlev, int mglev1, int mglev2) const; + //! Return the number of AMR levels + [[nodiscard]] int NAMRLevels () const noexcept { return m_num_amr_levels; } + + //! Return the number of MG levels at given AMR level + [[nodiscard]] int NMGLevels (int amrlev) const noexcept { return m_num_mg_levels[amrlev]; } + + [[nodiscard]] const Geometry& Geom (int amr_lev, int mglev=0) const noexcept { return m_geom[amr_lev][mglev]; } + + // BC + Vector > m_lobc; + Vector > m_hibc; + // Need to save the original copy because we change the BC type to + // Neumann for inhomogeneous Neumann and Robin. + Vector > m_lobc_orig; + Vector > m_hibc_orig; + protected: static constexpr int mg_coarsen_ratio = 2; @@ -544,14 +560,6 @@ protected: }; std::unique_ptr m_raii_comm; - // BC - Vector > m_lobc; - Vector > m_hibc; - // Need to save the original copy because we change the BC type to - // Neumann for inhomogeneous Neumann and Robin. - Vector > m_lobc_orig; - Vector > m_hibc_orig; - Array m_domain_bloc_lo {{AMREX_D_DECL(0._rt,0._rt,0._rt)}}; Array m_domain_bloc_hi {{AMREX_D_DECL(0._rt,0._rt,0._rt)}}; @@ -561,20 +569,12 @@ protected: const MF* m_coarse_data_for_bc = nullptr; MF m_coarse_data_for_bc_raii; - //! Return the number of AMR levels - [[nodiscard]] int NAMRLevels () const noexcept { return m_num_amr_levels; } - - //! Return the number of MG levels at given AMR level - [[nodiscard]] int NMGLevels (int amrlev) const noexcept { return m_num_mg_levels[amrlev]; } - //! Return AMR refinement ratios [[nodiscard]] const Vector& AMRRefRatio () const noexcept { return m_amr_ref_ratio; } //! Return AMR refinement ratio at given AMR level [[nodiscard]] int AMRRefRatio (int amr_lev) const noexcept { return m_amr_ref_ratio[amr_lev]; } - [[nodiscard]] const Geometry& Geom (int amr_lev, int mglev=0) const noexcept { return m_geom[amr_lev][mglev]; } - [[nodiscard]] FabFactory const* Factory (int amr_lev, int mglev=0) const noexcept { return m_factory[amr_lev][mglev].get(); } From 4b64003c80099627e9da856801517844f2bb21b8 Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Mon, 27 Nov 2023 15:58:46 -0600 Subject: [PATCH 046/186] Comments: correct some typos (#3641) ## Summary This PR fixes some typos and spelling errors in the comments that codespell seems to have missed. --- CHANGES | 6 +++--- Src/AmrCore/AMReX_FillPatcher.H | 8 ++++---- Src/AmrCore/AMReX_InterpFaceRegister.H | 4 ++-- Src/Base/AMReX_BCRec.H | 2 +- Src/Base/AMReX_FArrayBox.H | 2 +- Src/Base/AMReX_FabArray.H | 4 ++-- Src/Base/AMReX_MultiFab.H | 4 ++-- Src/Base/AMReX_MultiFabUtil.H | 2 +- Src/Base/AMReX_MultiFabUtil.cpp | 2 +- Src/Base/AMReX_iMultiFab.H | 4 ++-- Src/EB/AMReX_EB_utils.cpp | 2 +- Src/F_Interfaces/Base/AMReX_parallel_mod.F90 | 2 +- Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H | 10 +++++----- Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H | 2 +- Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 2 +- Src/LinearSolvers/MLMG/AMReX_MLMG.H | 2 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H | 8 ++++---- Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H | 2 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp | 2 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp | 2 +- 20 files changed, 36 insertions(+), 36 deletions(-) diff --git a/CHANGES b/CHANGES index a7a22fd97bd..f9f2d4febe8 100644 --- a/CHANGES +++ b/CHANGES @@ -72,7 +72,7 @@ -- We weren't defining cent_hat out far enough (#3548) - -- Add Fortran inteface for FillCoarsePatch for face variables (#3542) + -- Add Fortran interface for FillCoarsePatch for face variables (#3542) -- print_state/printCell: Make it work without managed memory (#3543) @@ -114,7 +114,7 @@ -- Simplify filterParticles Kernel (#3510) - -- Generatize particle-to-cell assignment function (#3499) + -- Generalize particle-to-cell assignment function (#3499) Follow-on to 3499 (#3514) ParticleLocator: Make Assignor optional template parameter (#3515) @@ -302,7 +302,7 @@ # 23.07 - -- Allow users to change the default vector growth stategy (#3389) + -- Allow users to change the default vector growth strategy (#3389) -- Communications arena implementation (#3388) diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H index 46d1107dea3..d36b3529efd 100644 --- a/Src/AmrCore/AMReX_FillPatcher.H +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -15,13 +15,13 @@ namespace amrex { * with interpolation of the coarse data. Then it fills the fine ghost * cells overlapping fine level valid cells with the fine level data. If * the valid cells of the destination need to be filled, it will be done as - * well. Finally, it will fill the physical bounbary using the user + * well. Finally, it will fill the physical boundary using the user * provided functor. The `fill` member function can be used to do the * operations just described. Alternatively, one can also use the * `fillCoarseFineBounary` to fill the ghost cells at the coarse/fine * boundary only. Then one can manually call FillBoundary to fill the other * ghost cells, and use the physical BC functor to handle the physical - * boundeary. + * boundary. * * The communication of the coarse data needed for spatial interpolation is * optimized at the cost of being error-prone. One must follow the @@ -42,7 +42,7 @@ namespace amrex { * * (3) When to destroy? Usually, we do time steppig on a coarse level * first. Then we recursively do time stepping on fine levels. After the - * finer level finishes, we do reflux and averge the fine data down to the + * finer level finishes, we do reflux and average the fine data down to the * coarse level. After that we should destroy the FillPatcher object * associated with these two levels, because the coarse data stored in the * object has become outdated. For AmrCore based codes, you could use @@ -118,7 +118,7 @@ public: * \param fbc for filling fine level physical BC * \param fbccomp starting component of the fine level BC functor * \param bcs BCRec specifying physical boundary types - * \parame bcscomp starting component of the BCRec Vector. + * \param bcscomp starting component of the BCRec Vector. * \param pre_interp optional pre-interpolation hook for modifying the coarse data * \param post_interp optional post-interpolation hook for modifying the fine data */ diff --git a/Src/AmrCore/AMReX_InterpFaceRegister.H b/Src/AmrCore/AMReX_InterpFaceRegister.H index 5e9f92784e7..a63c2c23e4c 100644 --- a/Src/AmrCore/AMReX_InterpFaceRegister.H +++ b/Src/AmrCore/AMReX_InterpFaceRegister.H @@ -10,7 +10,7 @@ namespace amrex { /** * \brief InterpFaceRegister is a coarse/fine boundary register for - * interpolation of face data at the coarse/fine boundadry. + * interpolation of face data at the coarse/fine boundary. */ class InterpFaceRegister @@ -31,7 +31,7 @@ public: Geometry const& fgeom, IntVect const& ref_ratio); /** - * \brief Defines an InterpFaceRegister objecct. + * \brief Defines an InterpFaceRegister object. * * \param fba The fine level BoxArray * \param fdm The fine level DistributionMapping diff --git a/Src/Base/AMReX_BCRec.H b/Src/Base/AMReX_BCRec.H index c39634cfb09..268147a3a04 100644 --- a/Src/Base/AMReX_BCRec.H +++ b/Src/Base/AMReX_BCRec.H @@ -43,7 +43,7 @@ public: {} /* * \brief Yet another constructor. Inherits bndry types from bc_domain - * when bx lies on edge of domain otherwise gets interior Dirchlet. + * when bx lies on edge of domain otherwise gets interior Dirichlet. */ AMREX_GPU_HOST_DEVICE BCRec (const Box& bx, diff --git a/Src/Base/AMReX_FArrayBox.H b/Src/Base/AMReX_FArrayBox.H index 084b38ce46b..b6dc4e887c8 100644 --- a/Src/Base/AMReX_FArrayBox.H +++ b/Src/Base/AMReX_FArrayBox.H @@ -116,7 +116,7 @@ public: * \brief Pure virtual function. Derived classes MUST override this * function to skip over the next FAB f in the istream, under the * assumption that the header for the FAB f has already been - * skpped over. + * skipped over. */ virtual void skip (std::istream& is, FArrayBox& f) const = 0; diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H index e507dab153b..96efc1f18fa 100644 --- a/Src/Base/AMReX_FabArray.H +++ b/Src/Base/AMReX_FabArray.H @@ -339,7 +339,7 @@ public: /** * \brief Construct an empty FabArray that has a default Arena. If - * `define` is called later with a nulltpr as MFInfo's arena, the + * `define` is called later with a nullptr as MFInfo's arena, the * default Arena `a` will be used. If the arena in MFInfo is not a * nullptr, the MFInfo's arena will be used. */ @@ -2827,7 +2827,7 @@ template void FabArray::shift (const IntVect& v) { - clearThisBD(); // The new boxarry will have a different ID. + clearThisBD(); // The new boxarray will have a different ID. boxarray.shift(v); addThisBD(); #ifdef AMREX_USE_OMP diff --git a/Src/Base/AMReX_MultiFab.H b/Src/Base/AMReX_MultiFab.H index 416c4540da5..1a6c1d7f15f 100644 --- a/Src/Base/AMReX_MultiFab.H +++ b/Src/Base/AMReX_MultiFab.H @@ -50,7 +50,7 @@ public: /** * \brief Constructs an empty MultiFab. Data can be defined at a later * time using the define member functions inherited from FabArray. If - * `define` is called later with a nulltpr as MFInfo's arena, the default + * `define` is called later with a nullptr as MFInfo's arena, the default * Arena `a` will be used. If the arena in MFInfo is not a nullptr, the * MFInfo's arena will be used. */ @@ -60,7 +60,7 @@ public: * \brief * Constructs a MultiFab * \param bs a valid region - * \param dm a DistribuionMapping + * \param dm a DistributionMapping * \param ncomp number of components * \param ngrow number of cells the region grows * \param info MFInfo diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index ad1fa669f32..064b8a1b19f 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -375,7 +375,7 @@ namespace amrex void FillRandom (MultiFab& mf, int scomp, int ncomp); /** - * \brief Fill MultiFab with random numbers from nornmal distribution + * \brief Fill MultiFab with random numbers from normal distribution * * All cells including ghost cells are filled. * diff --git a/Src/Base/AMReX_MultiFabUtil.cpp b/Src/Base/AMReX_MultiFabUtil.cpp index 93ba453cc07..5520c6164d7 100644 --- a/Src/Base/AMReX_MultiFabUtil.cpp +++ b/Src/Base/AMReX_MultiFabUtil.cpp @@ -477,7 +477,7 @@ namespace amrex auto tmptype = type; tmptype.set(dir); if (dir >= AMREX_SPACEDIM || !tmptype.nodeCentered()) { - amrex::Abort("average_down_edges: not face index type"); + amrex::Abort("average_down_edges: not edge index type"); } const int ncomp = crse.nComp(); if (isMFIterSafe(fine, crse)) diff --git a/Src/Base/AMReX_iMultiFab.H b/Src/Base/AMReX_iMultiFab.H index 519ab9c82a5..eb1e350433a 100644 --- a/Src/Base/AMReX_iMultiFab.H +++ b/Src/Base/AMReX_iMultiFab.H @@ -24,7 +24,7 @@ namespace amrex { * member functions are defined for I/O and simple arithmetic operations on * these aggregate objects. * -8 This class does NOT provide a copy constructor or assignment operator. +* This class does NOT provide a copy constructor or assignment operator. */ class iMultiFab : @@ -42,7 +42,7 @@ public: /** * \brief Constructs an empty iMultiFab. Data can be defined at a later * time using the define member functions inherited from FabArray. If - * `define` is called later with a nulltpr as MFInfo's arena, the default + * `define` is called later with a nullptr as MFInfo's arena, the default * Arena `a` will be used. If the arena in MFInfo is not a nullptr, the * MFInfo's arena will be used. */ diff --git a/Src/EB/AMReX_EB_utils.cpp b/Src/EB/AMReX_EB_utils.cpp index 948a3d5db20..857a8eb08a8 100644 --- a/Src/EB/AMReX_EB_utils.cpp +++ b/Src/EB/AMReX_EB_utils.cpp @@ -31,7 +31,7 @@ facets_nearest_pt (IntVect const& ind_pt, IntVect const& ind_loop, RealVect cons RealVect const& eb_normal, RealVect const& eb_p0, GpuArray const& dx) { - // Enumerate the possible EB facet edges invovlved. + // Enumerate the possible EB facet edges involved. int n_facets = 0; IntVect ind_facets {AMREX_D_DECL(0, 0, 0)}; for (int d = 0; d < AMREX_SPACEDIM; ++d) { diff --git a/Src/F_Interfaces/Base/AMReX_parallel_mod.F90 b/Src/F_Interfaces/Base/AMReX_parallel_mod.F90 index 00fcc275c89..68cf647ff7f 100644 --- a/Src/F_Interfaces/Base/AMReX_parallel_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_parallel_mod.F90 @@ -71,7 +71,7 @@ subroutine amrex_parallel_init (comm) if (present(comm) .and. .not.flag) then if (comm .ne. MPI_COMM_WORLD) then - stop "MPI has not been initialized. How come we are given a communciator?" + stop "MPI has not been initialized. How come we are given a communicator?" endif end if diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H index 5c90a4e21ff..cc9c361e916 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H @@ -72,7 +72,7 @@ public: /** * Sets alpha as a scalar field to values from a single component - * mutlifab. + * multifab. * * \param [in] amrlev The level of the multifab for the solver, with * \p amrlev = 0 always being the lowest level in the @@ -88,12 +88,12 @@ public: /** * Sets alpha as a single scalar constant value across - * the mutlifab. + * the multifab. * * \param [in] amrlev The level of the multifab for the solver, with * \p amrlev = 0 always being the lowest level in the * AMR hierarchy represented in the solve. - * \param [in] alpha Single scalar value to populate across mutlifab. + * \param [in] alpha Single scalar value to populate across multifab. */ template , @@ -118,12 +118,12 @@ public: /** * Sets beta as a single scalar constant value across - * the mutlifabs (one for each dimension). + * the multifabs (one for each dimension). * * \param [in] amrlev The level of the multifab for the solver, with * \p amrlev = 0 always being the lowest level in the * AMR hierarchy represented in the solve. - * \param [in] beta Single scalar value to populate across mutlifabs. + * \param [in] beta Single scalar value to populate across multifabs. */ template , diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 0c4937d2de5..05b3250caf8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -18,7 +18,7 @@ namespace amrex { // where phi and rhs are nodal multifab, and sigma is a tensor constant // with only diagonal components. The EB is assumed to be Dirichlet. // -// del dot (simga grad phi) - alpha/r^2 phi = rhs, for RZ where alpha is a +// del dot (sigma grad phi) - alpha/r^2 phi = rhs, for RZ where alpha is a // scalar constant that is zero by default. class MLEBNodeFDLaplacian diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index a889f7514af..b8aa71eebd0 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -287,7 +287,7 @@ public: * \param famrlev fine AMR level * \param fine fine level data * \param crse coarse level data - * \parame nghost number of ghost cells + * \param nghost number of ghost cells */ virtual void interpolationAmr (int famrlev, MF& fine, const MF& crse, IntVect const& nghost) const = 0; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index f1fed2d3db3..70e7e121486 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -1370,7 +1370,7 @@ MLMGT::mgFcycle () } } -// At the true bottom of the coarset AMR level. +// At the true bottom of the coarsest AMR level. // in : Residual (res) as b // out : Correction (cor) as x template diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H index d1ae9e0b7ed..32c75224e78 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H @@ -2406,7 +2406,7 @@ void mlndlap_fillijmat_aa_cpu (Box const& ndbx, Real f2xmy = Real(2.0)*facx - facy; Real fmx2y = Real(2.0)*facy - facx; - // Note that ccdom has been grown at peridoci boundaries. + // Note that ccdom has been grown at periodic boundaries. const Box& nddom = amrex::surroundingNodes(ccdom); constexpr auto gidmax = std::numeric_limits::max(); @@ -2552,7 +2552,7 @@ void mlndlap_fillijmat_ha_cpu (Box const& ndbx, Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0]; Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1]; - // Note that ccdom has been grown at peridoci boundaries. + // Note that ccdom has been grown at periodic boundaries. const Box& nddom = amrex::surroundingNodes(ccdom); constexpr auto gidmax = std::numeric_limits::max(); @@ -2708,7 +2708,7 @@ void mlndlap_fillijmat_cs_cpu (Box const& ndbx, Real f2xmy = Real(2.0)*facx - facy; Real fmx2y = Real(2.0)*facy - facx; - // Note that ccdom has been grown at peridoci boundaries. + // Note that ccdom has been grown at periodic boundaries. const Box& nddom = amrex::surroundingNodes(ccdom); constexpr auto gidmax = std::numeric_limits::max(); @@ -3355,7 +3355,7 @@ void mlndlap_fillijmat_cs_gpu (const int ps, const int i, const int j, const int fp = fm = Real(0.0); } - // Note that nddom has been grown at peridoci boundaries. + // Note that nddom has been grown at periodic boundaries. const Box& nddom = amrex::surroundingNodes(ccdom); constexpr auto gidmax = std::numeric_limits::max(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H index adbf00da231..bb2ec8dd4f5 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H @@ -6,7 +6,7 @@ namespace amrex { -// del dot (sigma grah phi) = rhs +// del dot (sigma grad phi) = rhs // where phi and rhs are nodal, and sigma is cell-centered. class MLNodeLaplacian diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp index 4a749a1ed08..d96e183af4d 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp @@ -947,7 +947,7 @@ MLNodeLaplacian::checkPoint (std::string const& file_name) const HeaderFile.precision(17); - // MLLinop stuff + // MLLinOp stuff HeaderFile << "verbose = " << verbose << "\n" << "nlevs = " << NAMRLevels() << "\n" << "do_agglomeration = " << info.do_agglomeration << "\n" diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp index 0fb9e2ba33b..41b52833e2c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp @@ -234,7 +234,7 @@ void MLNodeLinOp_set_dot_mask (MultiFab& dot_mask, iMultiFab const& omask, Geome Box nddomain = amrex::surroundingNodes(geom.Domain()); if (strategy != MLNodeLinOp::CoarseningStrategy::Sigma) { - nddomain.grow(1000); // hack to avoid masks being modified at Neuman boundary + nddomain.grow(1000); // hack to avoid masks being modified at Neumann boundary } #ifdef AMREX_USE_OMP From 4b8dd0373188ca6505e3a9dfaaafbaf4b7fe5ffe Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Tue, 28 Nov 2023 19:07:39 -0600 Subject: [PATCH 047/186] Comments on FAB-related classes and methods (#3636) New or updated comments for various FAB-related classes and methods. Descriptions are tweaked to match standard comment conventions and make IntelliSense happy. For example, some descriptions get moved to be just before their respective class declarations. --- Src/Base/AMReX_BaseFab.H | 96 ++++++++++++------------- Src/Base/AMReX_BoxArray.H | 12 ++-- Src/Base/AMReX_FArrayBox.H | 2 - Src/Base/AMReX_FabArray.H | 120 ++++++++++++++++---------------- Src/Base/AMReX_FabArrayBase.H | 6 ++ Src/Base/AMReX_FabConv.H | 86 +++++++++++------------ Src/Base/AMReX_MultiFab.H | 98 ++++++++++++++------------ Src/Base/AMReX_MultiFabUtil.H | 42 +++++++---- Src/Base/AMReX_MultiFabUtil.cpp | 3 - 9 files changed, 243 insertions(+), 222 deletions(-) diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index 99132038397..006d7639ad6 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -90,54 +90,6 @@ makeArray4 (T* p, Box const& bx, int ncomp) noexcept return Array4{p, amrex::begin(bx), amrex::end(bx), ncomp}; } -/** -* \brief A Fortran Array-like Object -* BaseFab emulates the Fortran array concept. -* Useful operations can be performed upon -* BaseFabs in C++, and they provide a convenient interface to -* Fortran when it is necessary to retreat into that language. - -* BaseFab is a template class. Through use of the -* template, a BaseFab may be based upon any class. So far at least, -* most applications have been based upon simple types like integers, -* real*4s, or real*8s. Most applications do not use BaseFabs -* directly, but utilize specialized classes derived from BaseFab. - -* Classes derived from BaseFab include FArrayBox, IArrayBox, TagBox, -* Mask, EBFArrayBox, EBCellFlag and CutFab. - -* BaseFab objects depend on the dimensionality of space -* (indirectly through the DOMAIN Box member). It is -* typical to define the macro SPACEDIM to be 1, 2, or 3 to indicate -* the dimension of space. See the discussion of class Box for more -* information. A BaseFab contains a Box DOMAIN, which indicates the -* integer indexing space over which the array is defined. A BaseFab -* also has NVAR components. By components, we mean that for each -* point in the rectangular indexing space, there are NVAR values -* associated with that point. A Fortran array corresponding to a -* BaseFab would have (SPACEDIM+1) dimensions. - -* By design, the array layout in a BaseFab mirrors that of a -* Fortran array. The first index (x direction for example) varies -* most rapidly, the next index (y direction), if any, varies next -* fastest. The component index varies last, after all the spatial -* indices. - -* It is sometimes convenient to be able to treat a sub-array within an -* existing BaseFab as a BaseFab in its own right. This is often -* referred to as aliasing the BaseFab. Note that when aliasing is -* used, the BaseFabs domain will not, in general, be the same as the -* parent BaseFabs domain, nor will the number of components. -* BaseFab is a dimension dependent class, so SPACEDIM must be -* defined as either 1, 2, or 3 when compiling. - -* This is NOT a polymorphic class. - -* It does NOT provide a copy constructor or assignment operator. - -* T MUST have a default constructor and an assignment operator. -*/ - template typename std::enable_if::value>::type placementNew (T* const /*ptr*/, Long /*n*/) @@ -178,6 +130,54 @@ placementDelete (T* const ptr, Long n) }); } +/** + * \brief A FortranArrayBox(FAB)-like object + * + * BaseFab emulates the Fortran array concept. + * Useful operations can be performed upon + * BaseFabs in C++, and they provide a convenient interface to + * Fortran when it is necessary to retreat into that language. + * + * BaseFab is a template class. Through use of the + * template, a BaseFab may be based upon any class. So far at least, + * most applications have been based upon simple types like integers, + * real*4s, or real*8s. Most applications do not use BaseFabs + * directly, but utilize specialized classes derived from BaseFab. + * + * Classes derived from BaseFab include FArrayBox, IArrayBox, TagBox, + * Mask, EBFArrayBox, EBCellFlag and CutFab. + * + * BaseFab objects depend on the dimensionality of space + * (indirectly through the DOMAIN Box member). It is + * typical to define the macro SPACEDIM to be 1, 2, or 3 to indicate + * the dimension of space. See the discussion of class Box for more + * information. A BaseFab contains a Box DOMAIN, which indicates the + * integer indexing space over which the array is defined. A BaseFab + * also has NVAR components. By components, we mean that for each + * point in the rectangular indexing space, there are NVAR values + * associated with that point. A Fortran array corresponding to a + * BaseFab would have (SPACEDIM+1) dimensions. + * + * By design, the array layout in a BaseFab mirrors that of a + * Fortran array. The first index (x direction for example) varies + * most rapidly, the next index (y direction), if any, varies next + * fastest. The component index varies last, after all the spatial + * indices. + * + * It is sometimes convenient to be able to treat a sub-array within an + * existing BaseFab as a BaseFab in its own right. This is often + * referred to as aliasing the BaseFab. Note that when aliasing is + * used, the BaseFabs domain will not, in general, be the same as the + * parent BaseFabs domain, nor will the number of components. + * BaseFab is a dimension dependent class, so SPACEDIM must be + * defined as either 1, 2, or 3 when compiling. + * + * This is NOT a polymorphic class. + * + * It does NOT provide a copy constructor or assignment operator. + * + * \tparam T MUST have a default constructor and an assignment operator. + */ template class BaseFab : public DataAllocator diff --git a/Src/Base/AMReX_BoxArray.H b/Src/Base/AMReX_BoxArray.H index 807cd9d8516..94358f580a4 100644 --- a/Src/Base/AMReX_BoxArray.H +++ b/Src/Base/AMReX_BoxArray.H @@ -515,16 +515,16 @@ struct BATransformer // for backward compatibility using BndryBATransformer = BATransformer; -/** -* \brief A collection of Boxes stored in an Array. It is a -* reference-counted concrete class, not a polymorphic one; i.e. you -* cannot use any of the List member functions with a BoxList. -*/ - class MFIter; class AmrMesh; class FabArrayBase; +/** + * \brief A collection of Boxes stored in an Array. + * + * It is a reference-counted concrete class, not a polymorphic one; i.e. you + * cannot use any of the List member functions with a BoxList. + */ class BoxArray { public: diff --git a/Src/Base/AMReX_FArrayBox.H b/Src/Base/AMReX_FArrayBox.H index b6dc4e887c8..2dda4e6b0a0 100644 --- a/Src/Base/AMReX_FArrayBox.H +++ b/Src/Base/AMReX_FArrayBox.H @@ -26,7 +26,6 @@ class FArrayBox; * primarily for FArrayBox implementers; i.e. user's shouldn't * call any of the member functions in this class directly. */ - class FABio // NOLINT(cppcoreguidelines-special-member-functions) { public: @@ -224,7 +223,6 @@ private: * This class does NOT provide a copy constructor or assignment operator, * but it has a move constructor. */ - class FArrayBox : public BaseFab diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H index 96efc1f18fa..56300d4cfef 100644 --- a/Src/Base/AMReX_FabArray.H +++ b/Src/Base/AMReX_FabArray.H @@ -56,64 +56,11 @@ Long nBytesOwned (T const&) noexcept { return 0; } template Long nBytesOwned (BaseFab const& fab) noexcept { return fab.nBytesOwned(); } -/* - A Collection of Fortran Array-like Objects - - - The FabArray class implements a collection (stored as an array) of - Fortran array-like objects. The parameterized type FAB is intended to be - any class derived from BaseFab. For example, FAB may be a BaseFab of - integers, so we could write: - - FabArray > int_fabs; - - Then int_fabs is a FabArray that can hold a collection of BaseFab - objects. - - FabArray is not just a general container class for Fortran arrays. It is - intended to hold "grid" data for use in finite difference calculations in - which the data is defined on a union of (usually disjoint) rectangular - regions embedded in a uniform index space. This region, called the valid - region, is represented by a BoxArray. For the purposes of this discussion, - the Kth Box in the BoxArray represents the interior region of the Kth grid. - - Since the intent is to be used with finite difference calculations a - FabArray also includes the notion of a boundary region for each grid. The - boundary region is specified by the ngrow parameter which tells the FabArray - to allocate each FAB to be ngrow cells larger in all directions than the - underlying Box. The larger region covered by the union of all the FABs is - called the region of definition. The underlying notion is that the valid - region contains the grid interior data and the region of definition includes - the interior region plus the boundary areas. - - Operations are available to copy data from the valid regions into these - boundary areas where the two overlap. The number of components, that is, - the number of values that can be stored in each cell of a FAB, is either - given as an argument to the constructor or is inherent in the definition of - the underlying FAB. Each FAB in the FabArray will have the same number of - components. - - In summary, a FabArray is an array of FABs. The Kth element contains a FAB - that holds the data for the Kth grid, a Box that defines the valid region - of the Kth grid. - - A typical use for a FabArray would be to hold the solution vector or - right-hand-side when solving a linear system of equations on a union of - rectangular grids. The copy operations would be used to copy data from the - valid regions of neighboring grids into the boundary regions after each - relaxation step of the iterative method. If a multigrid method is used, a - FabArray could be used to hold the data at each level in the multigrid - hierarchy. - - This class is a concrete class not a polymorphic one. - - This class does NOT provide a copy constructor or assignment operator. -*/ - -// -// alloc: allocate memory or not -// +/** + * \brief FabArray memory allocation information + */ struct MFInfo { + // alloc: allocate memory or not bool alloc = true; Arena* arena = nullptr; Vector tags; @@ -314,6 +261,60 @@ Add (FabArray& dst, FabArray const& src, int srccomp, int dstcomp, int } } +/** + * \brief An Array of FortranArrayBox(FAB)-like Objects + * + * The FabArray class implements a collection (stored as an array) of + * Fortran array box-like ( \p FAB ) objects. The parameterized type \p FAB is intended to be + * any class derived from BaseFab. For example, \p FAB may be a BaseFab of + * integers, so we could write: + * + * FabArray > int_fabs; + * + * Then int_fabs is a FabArray that can hold a collection of BaseFab + * objects. + * + * FabArray is not just a general container class for Fortran arrays. It is + * intended to hold "grid" data for use in finite difference calculations in + * which the data is defined on a union of (usually disjoint) rectangular + * regions embedded in a uniform index space. This region, called the valid + * region, is represented by a BoxArray. For the purposes of this discussion, + * the Kth Box in the BoxArray represents the interior region of the Kth grid. + * + * Since the intent is to be used with finite difference calculations a + * FabArray also includes the notion of a boundary region for each grid. The + * boundary region is specified by the ngrow parameter which tells the FabArray + * to allocate each \p FAB to be ngrow cells larger in all directions than the + * underlying Box. The larger region covered by the union of all the \p FABs is + * called the region of definition. The underlying notion is that the valid + * region contains the grid interior data and the region of definition includes + * the interior region plus the boundary areas. + * + * Operations are available to copy data from the valid regions into these + * boundary areas where the two overlap. The number of components, that is, + * the number of values that can be stored in each cell of a \p FAB, is either + * given as an argument to the constructor or is inherent in the definition of + * the underlying \p FAB. Each \p FAB in the FabArray will have the same number of + * components. + * + * In summary, a FabArray is an array of \p FABs. The Kth element contains a \p FAB + * that holds the data for the Kth grid, a Box that defines the valid region + * of the Kth grid. + * + * A typical use for a FabArray would be to hold the solution vector or + * right-hand-side when solving a linear system of equations on a union of + * rectangular grids. The copy operations would be used to copy data from the + * valid regions of neighboring grids into the boundary regions after each + * relaxation step of the iterative method. If a multigrid method is used, a + * FabArray could be used to hold the data at each level in the multigrid + * hierarchy. + * + * This class is a concrete class not a polymorphic one. + * + * This class does NOT provide a copy constructor or assignment operator. + * + * \tparam FAB FortranArrayBox-like object. Typically a derived class of BaseFab. Not to be confused with FabArrayBase. + */ template class FabArray : @@ -338,8 +339,9 @@ public: FabArray () noexcept; /** - * \brief Construct an empty FabArray that has a default Arena. If - * `define` is called later with a nullptr as MFInfo's arena, the + * \brief Construct an empty FabArray that has a default Arena. + * + * If `define` is called later with a nullptr as MFInfo's arena, the * default Arena `a` will be used. If the arena in MFInfo is not a * nullptr, the MFInfo's arena will be used. */ diff --git a/Src/Base/AMReX_FabArrayBase.H b/Src/Base/AMReX_FabArrayBase.H index 29d3d63b29e..d8bc4411874 100644 --- a/Src/Base/AMReX_FabArrayBase.H +++ b/Src/Base/AMReX_FabArrayBase.H @@ -28,6 +28,12 @@ template class FabArray; namespace EB2 { class IndexSpace; } +/** + * \brief Base class for FabArray. + * + * Not to be confused with FArrayBox or `FAB` shorthands. + * Can be read as FArrayBox-like Array Base. + */ class FabArrayBase { friend class MFIter; diff --git a/Src/Base/AMReX_FabConv.H b/Src/Base/AMReX_FabConv.H index 78554000f25..25dae063de7 100644 --- a/Src/Base/AMReX_FabConv.H +++ b/Src/Base/AMReX_FabConv.H @@ -13,20 +13,18 @@ namespace amrex { -// -// A Descriptor of the Long Integer type - /** -* This class is meant to hold all information needed to completely -* describe the "int" or "Long" type on a machine. To describe an integer both -* the number of bytes and their ordering, relative to canonical -* ordering 1 .. sizeof(Long), needs to be specified. -* This allows us to write out integers in the native format on a machine, -* and then by also saving the IntDescriptor, we can read them back in on -* another machine and have enough information to construct the exact same -* values. -*/ - + * \brief A Descriptor of the Long Integer type + * + * This class is meant to hold all information needed to completely + * describe the "int" or "Long" type on a machine. To describe an integer both + * the number of bytes and their ordering, relative to canonical + * ordering 1 .. sizeof(Long), needs to be specified. + * This allows us to write out integers in the native format on a machine, + * and then by also saving the IntDescriptor, we can read them back in on + * another machine and have enough information to construct the exact same + * values. + */ class IntDescriptor { @@ -72,39 +70,37 @@ std::ostream& operator<< (std::ostream& os, const IntDescriptor& id); //! std::istream& operator>> (std::istream& is, IntDescriptor& id); - - //A Descriptor of the Real Type - /** -* \brief This class is meant to hold all information needed to completely -* describe the "Real" floating-point type on a machine. By "Real" here we -* mean either the "float" or "double" type that this version of AMReX -* was built with, which corresponds to whether BL_USE_FLOAT or -* BL_USE_DOUBLE was used to build the version of the library. -* -* To describe a "Real" type two arrays are needed: one detailing the ordering -* of the bytes in the Real, relative to the canonical ordering -* 1 .. sizeof(Real) and the other detailing the format of the floating-point -* number. -* -* The array detailing the format of a floating-point number is an eight-element -* array of longs containing the following information: -* -* format[0] = number of bits per number -* format[1] = number of bits in exponent -* format[2] = number of bits in mantissa -* format[3] = start bit of sign -* format[4] = start bit of exponent -* format[5] = start bit of mantissa -* format[6] = high order mantissa bit (CRAY needs this) -* format[7] = bias of exponent -* -* This allows us to write out "Real"s in the native format on a machine, -* and then by also saving the IntDescriptor, we can read them back in on -* another machine and have enough information to construct the exact same -* "Real" values, provided the Reals have the same size on the two machines. -*/ - + * \brief A Descriptor of the Real Type + * + * This class is meant to hold all information needed to completely + * describe the "Real" floating-point type on a machine. By "Real" here we + * mean either the "float" or "double" type that this version of AMReX + * was built with, which corresponds to whether BL_USE_FLOAT or + * BL_USE_DOUBLE was used to build the version of the library. + * + * To describe a "Real" type two arrays are needed: one detailing the ordering + * of the bytes in the Real, relative to the canonical ordering + * 1 .. sizeof(Real) and the other detailing the format of the floating-point + * number. + * + * The array detailing the format of a floating-point number is an eight-element + * array of longs containing the following information: + * + * format[0] = number of bits per number + * format[1] = number of bits in exponent + * format[2] = number of bits in mantissa + * format[3] = start bit of sign + * format[4] = start bit of exponent + * format[5] = start bit of mantissa + * format[6] = high order mantissa bit (CRAY needs this) + * format[7] = bias of exponent + * + * This allows us to write out "Real"s in the native format on a machine, + * and then by also saving the IntDescriptor, we can read them back in on + * another machine and have enough information to construct the exact same + * "Real" values, provided the Reals have the same size on the two machines. + */ class RealDescriptor { public: diff --git a/Src/Base/AMReX_MultiFab.H b/Src/Base/AMReX_MultiFab.H index 1a6c1d7f15f..fdf2e67cbd8 100644 --- a/Src/Base/AMReX_MultiFab.H +++ b/Src/Base/AMReX_MultiFab.H @@ -24,16 +24,17 @@ using fMultiFab = FabArray >; class iMultiFab; /** - * \brief - * A collection (stored as an array) of FArrayBox objects. + * \brief A collection (stored as an array) of FArrayBox objects. + * * This class is useful for storing floating point data on a domain defined by * a union of rectangular regions embedded in a uniform index space. * MultiFab class extends the function of the underlying FabArray class just * as the FArrayBox class extends the function of BaseFab. - * Additional member functions are defined for I/O and simple arithmetic operations on these aggregate objects. + * Additional member functions are defined for I/O and simple arithmetic + * operations on these aggregate objects. + * * This class does NOT provide a copy constructor or assignment operator. */ - class MultiFab : public FabArray @@ -41,34 +42,36 @@ class MultiFab public: /** - * \brief Constructs an empty MultiFab. Data can be defined at a later - * time using the define member functions inherited - * from FabArray. + * \brief Constructs an empty MultiFab. + * + * Data can be defined at a later time using the define member functions + * inherited from FabArray. */ MultiFab () noexcept; /** - * \brief Constructs an empty MultiFab. Data can be defined at a later - * time using the define member functions inherited from FabArray. If - * `define` is called later with a nullptr as MFInfo's arena, the default - * Arena `a` will be used. If the arena in MFInfo is not a nullptr, the - * MFInfo's arena will be used. + * \brief Constructs an empty MultiFab. + * + * Data can be defined at a later time using the define member functions. + * If `define` is called later with a nullptr as MFInfo's arena, the + * default Arena `a` will be used. If the arena in MFInfo is not a + * nullptr, the MFInfo's arena will be used. */ explicit MultiFab (Arena* a) noexcept; /** - * \brief - * Constructs a MultiFab - * \param bs a valid region - * \param dm a DistributionMapping + * \brief Constructs a MultiFab + * + * The size of the FArrayBox is given by the Box grown by \p ngrow, and + * the number of components is given by \p ncomp. If \p info is set to + * not allocating memory, then no FArrayBoxes are allocated at + * this time but can be defined later. + * + * \param bxs a valid region + * \param dm a DistribuionMapping * \param ncomp number of components * \param ngrow number of cells the region grows * \param info MFInfo - - * The size of the FArrayBox is given by the Box grown by ngrow, and - * the number of components is given by ncomp. If info is set to - * not allocating memory, then no FArrayBoxes are allocated at - * this time but can be defined later. */ MultiFab (const BoxArray& bxs, const DistributionMapping& dm, @@ -95,10 +98,11 @@ public: #endif /** - * \brief Make an alias MultiFab. maketype must be - * amrex::make_alias. scomp is the starting component of the - * alias and ncomp is the number of components in the new aliasing - * MultiFab. + * \brief Make an alias MultiFab. + * + * Note that \p maketype must be `amrex::make_alias`, + * \p scomp is the starting component of the alias, and + * \p ncomp is the number of components in the new aliasing MultiFab. */ MultiFab (const MultiFab& rhs, MakeType maketype, int scomp, int ncomp); @@ -135,11 +139,13 @@ public: #endif MultiFab& operator= (Real r); - // + /** - * \brief Returns the minimum value contained in component comp of the - * MultiFab. The parameter nghost determines the number of - * boundary cells to search for the minimum. The default is to + * \brief Returns the minimum value contained in component \p comp of the + * MultiFab. + * + * The parameter \p nghost determines the number of + * boundary cells to search for the minimum. The default is to * search only the valid regions of the FArrayBoxes. */ [[nodiscard]] Real min (int comp, @@ -154,16 +160,18 @@ public: int nghost = 0, bool local = false) const; /** - * \brief Returns the maximum value contained in component comp of the - * MultiFab. The parameter nghost determines the number of - * boundary cells to search for the maximum. The default is to + * \brief Returns the maximum value contained in component \p comp of the + * MultiFab. + * + * The parameter \p nghost determines the number of + * boundary cells to search for the maximum. The default is to * search only the valid regions of the FArrayBoxes. */ [[nodiscard]] Real max (int comp, int nghost = 0, bool local = false) const; /** - * \brief Identical to the previous max() function, but confines its + * \brief Identical to the previous `max()` function, but confines its * search to intersection of Box b and the MultiFab. */ [[nodiscard]] Real max (const Box& region, @@ -191,7 +199,7 @@ public: /** * \brief Returns the maximum *absolute* values contained in - * each component of "comps" of the MultiFab. "nghost" ghost cells are used. + * each component of \p comps of the MultiFab. \p nghost ghost cells are used. */ [[nodiscard]] Vector norm0 (const Vector& comps, int nghost = 0, bool local = false, bool ignore_covered = false ) const; [[nodiscard]] Vector norminf (const Vector& comps, int nghost = 0, bool local = false, bool ignore_covered = false) const { @@ -199,13 +207,14 @@ public: } /** - * \brief Returns the L1 norm of component "comp" over the MultiFab. + * \brief Returns the L1 norm of component \p comp over the MultiFab. + * * No ghost cells are used. This version has no double counting for nodal data. */ [[nodiscard]] Real norm1 (int comp, const Periodicity& period, bool ignore_covered = false) const; /** - * \brief Returns the L1 norm of component "comp" over the MultiFab. - * ngrow ghost cells are used. + * \brief Returns the L1 norm of component \p comp over the MultiFab. + * \p ngrow ghost cells are used. */ [[nodiscard]] Real norm1 (int comp = 0, int ngrow = 0, bool local = false) const; /** @@ -214,12 +223,12 @@ public: */ [[nodiscard]] Vector norm1 (const Vector& comps, int ngrow = 0, bool local = false) const; /** - * \brief Returns the L2 norm of component "comp" over the MultiFab. + * \brief Returns the L2 norm of component \p comp over the MultiFab. * No ghost cells are used. */ [[nodiscard]] Real norm2 (int comp = 0) const; /** - * \brief Returns the L2 norm of component "comp" over the MultiFab. + * \brief Returns the L2 norm of component \p comp over the MultiFab. * No ghost cells are used. This version has no double counting for nodal data. */ [[nodiscard]] Real norm2 (int comp, const Periodicity& period) const; @@ -236,16 +245,17 @@ public: using FabArray::sum; /** - * \brief Same as sum with local=false, but for non-cell-centered data, this - * skips non-unique points that are owned by multiple boxes. + * \brief Same as sum with \p local =false, but for non-cell-centered data, this + * skips non-unique points that are owned by multiple boxes. */ [[nodiscard]] Real sum_unique (int comp = 0, bool local = false, const Periodicity& period = Periodicity::NonPeriodic()) const; /** - * \brief Adds the scalar value val to the value of each cell in the - * specified subregion of the MultiFab. The subregion consists - * of the num_comp components starting at component comp. + * \brief Adds the scalar value \p val to the value of each cell in the + * specified subregion of the MultiFab. + * + * The subregion consists of the \p num_comp components starting at component \p comp. * The value of nghost specifies the number of cells in the * boundary region of each FArrayBox in the subregion that should * be modified. diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index 064b8a1b19f..29af89ba88e 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -19,67 +19,76 @@ namespace amrex const MultiFab& nd, int scomp, int ncomp, int ngrow = 0); - //! Average edge-based MultiFab onto cell-centered MultiFab. This fills in - //! ngrow ghost cells in the cell-centered MultiFab. Both cell centered and - //! edge centered MultiFabs need to have ngrow ghost values + /** + * \brief Average edge-based MultiFab onto cell-centered MultiFab. + * + * This fills in \p ngrow ghost cells in the cell-centered MultiFab. Both cell centered and + * edge centered MultiFabs need to have \p ngrow ghost values. + */ void average_edge_to_cellcenter (MultiFab& cc, int dcomp, const Vector& edge, int ngrow = 0); - //! Average face-based MultiFab onto cell-centered MultiFab. void average_face_to_cellcenter (MultiFab& cc, int dcomp, const Vector& fc, int ngrow = 0); - + //! Average face-based FabArray onto cell-centered FabArray. template && IsFabArray_v, int> = 0> void average_face_to_cellcenter (CMF& cc, int dcomp, const Array& fc, int ngrow = 0); - + //! Average face-based MultiFab onto cell-centered MultiFab with geometric weighting. void average_face_to_cellcenter (MultiFab& cc, const Vector& fc, const Geometry& geom); + //! Average face-based MultiFab onto cell-centered MultiFab with geometric weighting. void average_face_to_cellcenter (MultiFab& cc, const Array& fc, const Geometry& geom); - - //! Average cell-centered MultiFab onto face-based MultiFab. + //! Average cell-centered MultiFab onto face-based MultiFab with geometric weighting. void average_cellcenter_to_face (const Vector& fc, const MultiFab& cc, const Geometry& geom, int ncomp = 1, bool use_harmonic_averaging = false); + //! Average cell-centered MultiFab onto face-based MultiFab with geometric weighting. void average_cellcenter_to_face (const Array& fc, const MultiFab& cc, const Geometry& geom, int ncomp = 1, bool use_harmonic_averaging = false); - //! Average fine face-based MultiFab onto crse face-based MultiFab. + //! Average fine face-based FabArray onto crse face-based FabArray. template ::value,int> = 0> void average_down_faces (const Vector& fine, const Vector& crse, const IntVect& ratio, int ngcrse = 0); + //! Average fine face-based FabArray onto crse face-based FabArray. template ::value,int> = 0> void average_down_faces (const Vector& fine, const Vector& crse, int ratio, int ngcrse = 0); + //! Average fine face-based FabArray onto crse face-based FabArray. template ::value,int> = 0> void average_down_faces (const Array& fine, const Array& crse, const IntVect& ratio, int ngcrse = 0); + //! Average fine face-based FabArray onto crse face-based FabArray. template ::value,int> = 0> void average_down_faces (const Array& fine, const Array& crse, int ratio, int ngcrse = 0); - //! This version does average down for one direction. - //! It uses the IndexType of MultiFabs to determine the direction. - //! It is expected that one direction is nodal and the rest are cell-centered. + /** + * \brief This version does average down for one face direction. + * + * It uses the IndexType of MultiFabs to determine the direction. + * It is expected that one direction is nodal and the rest are cell-centered. + */ template void average_down_faces (const FabArray& fine, FabArray& crse, const IntVect& ratio, int ngcrse=0); @@ -117,9 +126,12 @@ namespace amrex int ngcrse = 0, bool mfiter_is_definitely_safe=false); - //! Average fine cell-based MultiFab onto crse cell-centered MultiFab using - //! volume-weighting. This routine DOES NOT assume that the crse BoxArray is - //! a coarsened version of the fine BoxArray. + /** + * \brief Volume weighed average of fine MultiFab onto coarse MultiFab. + * + * Both MultiFabs are assumed to be cell-centered. This routine DOES NOT assume that + * the crse BoxArray is a coarsened version of the fine BoxArray. + */ void average_down (const MultiFab& S_fine, MultiFab& S_crse, const Geometry& fgeom, const Geometry& cgeom, int scomp, int ncomp, const IntVect& ratio); diff --git a/Src/Base/AMReX_MultiFabUtil.cpp b/Src/Base/AMReX_MultiFabUtil.cpp index 5520c6164d7..27efbffca0f 100644 --- a/Src/Base/AMReX_MultiFabUtil.cpp +++ b/Src/Base/AMReX_MultiFabUtil.cpp @@ -308,9 +308,6 @@ namespace amrex // ************************************************************************************************************* - // Average fine cell-based MultiFab onto crse cell-centered MultiFab. - // We do NOT assume that the coarse layout is a coarsened version of the fine layout. - // This version DOES use volume-weighting. void average_down (const MultiFab& S_fine, MultiFab& S_crse, const Geometry& fgeom, const Geometry& cgeom, int scomp, int ncomp, int rr) From 44c5c7a5da23c1a5e7e899612b1df696cea7e4fe Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 29 Nov 2023 15:07:44 -0800 Subject: [PATCH 048/186] When checking for periodic outs on GPU, copy full particle data (#3646) Fixes a bug in mapped coordinates when GPUs are enabled. The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Particle/AMReX_ParticleUtil.H | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index d4b13030c04..5430cd34037 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -487,11 +487,7 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, CellAssignor&& ass } else { - amrex::Particle<0> p_prime; - AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);, - p_prime.pos(1) = src_data.pos(1, i+this_offset);, - p_prime.pos(2) = src_data.pos(2, i+this_offset);); - + auto p_prime = src_data.getSuperParticle(i+this_offset); enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per); auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow, assignor); assigned_grid = amrex::get<0>(tup_prime); From 0cd1385bde604099e5822d636f85d724d74d756e Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 29 Nov 2023 15:41:35 -0800 Subject: [PATCH 049/186] Fix: nosmt OMP Threads Default (#3647) ## Summary Fix that `OMP_NUM_THREADS` was ignored in non-verbose runs. ## Additional background Follow-up to #3607 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_OpenMP.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Src/Base/AMReX_OpenMP.cpp b/Src/Base/AMReX_OpenMP.cpp index 5ddd9944411..c0c33ce962f 100644 --- a/Src/Base/AMReX_OpenMP.cpp +++ b/Src/Base/AMReX_OpenMP.cpp @@ -152,12 +152,13 @@ namespace amrex::OpenMP // default or OMP_NUM_THREADS environment variable } else if (omp_threads == "nosmt") { char const *env_omp_num_threads = std::getenv("OMP_NUM_THREADS"); - if (env_omp_num_threads != nullptr && amrex::system::verbose > 1) { + if (env_omp_num_threads == nullptr) { + omp_set_num_threads(numUniquePhysicalCores()); + } + else if (amrex::system::verbose > 1) { amrex::Print() << "amrex.omp_threads was set to nosmt," << "but OMP_NUM_THREADS was set. Will keep " << "OMP_NUM_THREADS=" << env_omp_num_threads << ".\n"; - } else { - omp_set_num_threads(numUniquePhysicalCores()); } } else { std::optional num_omp_threads = to_int(omp_threads); From 9b733ec45cd93a80234a7c98248b6eb4816589d5 Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Wed, 29 Nov 2023 17:43:44 -0600 Subject: [PATCH 050/186] solve_cg: avoid use of MF `z` (#3637) ## Summary The method `solve_cg` is modified so that the MF `z` is no longer used, thus saving on memory usage. ## Additional background This is similar to the PR that changed `solve_bicgstab` that avoided use of `s`. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index 4768f06fc41..3afa56ee245 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -262,7 +262,6 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) MF sorig = Lp.make(amrlev, mglev, nghost); MF r = Lp.make(amrlev, mglev, nghost); - MF z = Lp.make(amrlev, mglev, nghost); MF q = Lp.make(amrlev, mglev, nghost); sorig.LocalCopy(sol,0,0,ncomp,nghost); @@ -295,9 +294,7 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) for (; iter <= maxiter; ++iter) { - z.LocalCopy(r,0,0,ncomp,nghost); - - RT rho = dotxy(z,r); + RT rho = dotxy(r,r); if ( rho == 0 ) { @@ -305,12 +302,12 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) } if (iter == 1) { - p.LocalCopy(z,0,0,ncomp,nghost); + p.LocalCopy(r,0,0,ncomp,nghost); } else { RT beta = rho/rho_1; - MF::Xpay(p, beta, z, 0, 0, ncomp, nghost); // p = z + beta * p + MF::Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta * p } Lp.apply(amrlev, mglev, q, p, MLLinOpT::BCMode::Homogeneous, MLLinOpT::StateMode::Correction); From 2f47fa7361bbf5793503cfb31b717bece889bde0 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 1 Dec 2023 09:54:52 -0800 Subject: [PATCH 051/186] Update CHANGES for 23.12 (#3651) --- CHANGES | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/CHANGES b/CHANGES index f9f2d4febe8..82b28a03e87 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,44 @@ +# 23.12 + + -- solve_cg: avoid use of MF `z` (#3637) + + -- Fix: nosmt OMP Threads Default (#3647) + `amrex.omp_threads`: Can Avoid SMT (#3607) + + -- When checking for periodic outs on GPU, copy full particle data (#3646) + + -- MLEBABecLap: Support Robin BC at Domain Boundaries (#3617) + + -- Ascent: SoA Particle Support (#3350) + + -- solve_bicgstab: use fewer MFs (#3635) + + -- solve_bicgstab: cut use of `s` (#3629) + + -- Bug fix for amrex::Subtract when called with interger nghost (#3634) + + -- Fix typo in `MLMGT::getGradSolution` when `MF` is different from `AMF` (#3631) + + -- SUNDIALS: Use sunrealtype instead of realtype (#3632) + + -- SYCL: Use get_multi_ptr instead of get_pointer (#3630) + + -- Plotfile Tools: GPU support (#3626) + + -- solve_cg: use linop.make instead of MF constructor (#3627) + + -- CArena: shrink_in_place and operator<< (#3621) + + -- solve_bicgstab: use linop.make instead of MF constructor (#3619) + + -- replace AMREX_DEVICE_COMPILE with AMREX_IF_ON_DEVICE and AMREX_IF_ON_HOST (#3591) + + -- [Breaking] Prefix `amrex_` to each plotfile Tool (#3600) + + -- FillRandom: Use MKL host API (#3536) + + -- use hipPointerAttribute_t.type as HIP is removing hipPointerAttribute_t.memoryType (#3610) + # 23.11 -- Give FlashFluxRegisters ways to accumulate data in registers (#3597) From eb388ae90a7534b32c67f0410cd595a39bc362f9 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Sat, 2 Dec 2023 14:45:29 -0800 Subject: [PATCH 052/186] Work around compiler bug in nvcc 12.2 by using functor instead of lambda (#3653) This fixes an issue seen in ERF on garuda. The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Particle/AMReX_ParticleIO.H | 38 ++++++++++++--------------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index ea24b1e5f81..e6969c9b1bd 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -32,6 +32,14 @@ ParticleContainer_impl + AMREX_GPU_HOST_DEVICE + int operator() (const P& p) const { + return p.id() > 0; + } +}; + template class Allocator, class CellAssignor> void @@ -77,10 +85,7 @@ ParticleContainer_impl int - { - return p.id() > 0; - }, true); + FilterPositiveID{}, true); } template 0; - }); + FilterPositiveID{}); } template 0; - }); + FilterPositiveID{}); } template 0; - }); + FilterPositiveID{}); } template 0; - }); + FilterPositiveID{}); } template 0; - }); + FilterPositiveID{}); } template Date: Sat, 2 Dec 2023 14:45:59 -0800 Subject: [PATCH 053/186] Disable CodeQL scheduled jobs on forks (#3649) --- .github/workflows/codeql.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 0e03e384633..f240930bd8b 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -14,6 +14,7 @@ concurrency: jobs: analyze: + if: ${{ github.repository == 'AMReX-Codes/amrex' || github.event_name != 'schedule' }} name: Analyze runs-on: ubuntu-latest permissions: From 72c333d39ce9a7ba2e2bec85b2d9ef4f5230ca9b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 2 Dec 2023 14:46:30 -0800 Subject: [PATCH 054/186] Robustify the Cache Cleanup Scripts (#3650) Make changes to handle the cases where the workflow names contain spaces. Note that none of the workflow names in amrex has spaces. --- .github/workflows/cleanup-cache-postpr.yml | 5 ++++- .github/workflows/cleanup-cache.yml | 13 ++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cleanup-cache-postpr.yml b/.github/workflows/cleanup-cache-postpr.yml index 73d6eaf0903..978e9c28f04 100644 --- a/.github/workflows/cleanup-cache-postpr.yml +++ b/.github/workflows/cleanup-cache-postpr.yml @@ -31,7 +31,10 @@ jobs: set +e keys=$(gh actions-cache list -L 100 -R $REPO -B $BRANCH | cut -f 1) + # $keys might contain spaces. Thus we set IFS to \n. + IFS=$'\n' for k in $keys do - gh actions-cache delete $k -R $REPO -B $BRANCH --confirm + gh actions-cache delete "$k" -R $REPO -B $BRANCH --confirm done + unset IFS diff --git a/.github/workflows/cleanup-cache.yml b/.github/workflows/cleanup-cache.yml index f224ace0ef3..cafae38bf1c 100644 --- a/.github/workflows/cleanup-cache.yml +++ b/.github/workflows/cleanup-cache.yml @@ -27,7 +27,7 @@ jobs: EVENT=${{ github.event.workflow_run.event }} # Triggering workflow run name (e.g., LinuxClang) - WORKFLOW_NAME=${{ github.event.workflow_run.name }} + WORKFLOW_NAME="${{ github.event.workflow_run.name }}" if [[ $EVENT == "pull_request" ]]; then gh run download ${{ github.event.workflow_run.id }} -n pr_number @@ -45,16 +45,19 @@ jobs: # The goal is to keep the last used key of each job and delete all others. # something like ccache-LinuxClang- - keyprefix=ccache-${WORKFLOW_NAME}- + keyprefix="ccache-${WORKFLOW_NAME}-" - cached_jobs=$(gh actions-cache list -L 100 -R $REPO -B $BRANCH --key $keyprefix | awk -F '-git-' '{print $1}' | sort | uniq) + cached_jobs=$(gh actions-cache list -L 100 -R $REPO -B $BRANCH --key "$keyprefix" | awk -F '-git-' '{print $1}' | sort | uniq) # cached_jobs is something like "ccache-LinuxClang-configure-1d ccache-LinuxClang-configure-2d". + # It might also contain spaces. Thus we set IFS to \n. + IFS=$'\n' for j in $cached_jobs do - old_keys=$(gh actions-cache list -L 100 -R $REPO -B $BRANCH --key ${j}-git- --sort last-used | cut -f 1 | tail -n +2) + old_keys=$(gh actions-cache list -L 100 -R $REPO -B $BRANCH --key "${j}-git-" --sort last-used | cut -f 1 | tail -n +2) for k in $old_keys do - gh actions-cache delete $k -R $REPO -B $BRANCH --confirm + gh actions-cache delete "$k" -R $REPO -B $BRANCH --confirm done done + unset IFS From 2f1b1d1206eec72dbbc364c5c95c5e515f4fe479 Mon Sep 17 00:00:00 2001 From: Candace Gilet Date: Sat, 2 Dec 2023 17:47:38 -0500 Subject: [PATCH 055/186] Clarify documentation on setEBDirchlet() and fix link to AMReX-Hydro (#3652) ## Summary ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Docs/sphinx_documentation/source/LinearSolvers.rst | 9 ++++++--- Docs/sphinx_documentation/source/conf.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Docs/sphinx_documentation/source/LinearSolvers.rst b/Docs/sphinx_documentation/source/LinearSolvers.rst index e7266f02243..fb7138e1f4d 100644 --- a/Docs/sphinx_documentation/source/LinearSolvers.rst +++ b/Docs/sphinx_documentation/source/LinearSolvers.rst @@ -483,7 +483,9 @@ To set homogeneous Dirichlet boundary conditions, call ml_ebabeclap->setEBHomogDirichlet(lev, coeff); where coeff can be a real number (i.e. the value is the same at every cell) -or is the MultiFab holding the coefficient of the gradient at each cell with an EB face. +or a MultiFab holding the coefficient of the gradient at each cell with an EB face. +In other words, coeff is :math:`\beta` in the canonical form given in equation :eq:`eqn::abeclap` +located at the EB surface centroid. To set inhomogeneous Dirichlet boundary conditions, call @@ -494,8 +496,9 @@ To set inhomogeneous Dirichlet boundary conditions, call ml_ebabeclap->setEBDirichlet(lev, phi_on_eb, coeff); where phi_on_eb is the MultiFab holding the Dirichlet values in every cut cell, -and coeff again is a real number (i.e. the value is the same at every cell) -or a MultiFab holding the coefficient of the gradient at each cell with an EB face. +and coeff again is a real number +or a MultiFab holding the coefficient of the gradient at each cell with an EB face, +i.e. :math:`\beta` in equation :eq:`eqn::abeclap` located at the EB surface centroid. Currently there are options to define the face-based coefficients on face centers vs face centroids, and to interpret the solution variable diff --git a/Docs/sphinx_documentation/source/conf.py b/Docs/sphinx_documentation/source/conf.py index dc29ab6e041..8cb17c78e1f 100644 --- a/Docs/sphinx_documentation/source/conf.py +++ b/Docs/sphinx_documentation/source/conf.py @@ -42,7 +42,7 @@ def get_amrex_version(): intersphinx_mapping = { 'amrex_tutorials': ('https://amrex-codes.github.io/amrex/tutorials_html/', None), - 'amrex_hydro':('https://amrex-codes.github.io/amrex/hydro_html/', None) + 'amrex_hydro':('https://amrex-fluids.github.io/amrex-hydro/docs_html/', None) } # Add any paths that contain templates here, relative to this directory. From efd77ffab39c4e32e2b9e4a4fd446a49e22f0c9a Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Sat, 2 Dec 2023 16:51:48 -0800 Subject: [PATCH 056/186] two separate fixes -- particle_compare and ref_ratio=1 (#3655) ## Summary ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [X] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/AmrCore/AMReX_AmrMesh.cpp | 2 +- .../Postprocessing/C_Src/particle_compare.cpp | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Src/AmrCore/AMReX_AmrMesh.cpp b/Src/AmrCore/AMReX_AmrMesh.cpp index efccf318315..c0e191d79e4 100644 --- a/Src/AmrCore/AMReX_AmrMesh.cpp +++ b/Src/AmrCore/AMReX_AmrMesh.cpp @@ -1082,7 +1082,7 @@ AmrMesh::checkInput () for (int i = 0; i < max_level; i++) { if (MaxRefRatio(i) < 2) { - amrex::Error("Amr::checkInput: bad ref_ratios"); + amrex::Warning("Amr::checkInput: ref_ratios all equal to one!"); } } diff --git a/Tools/Postprocessing/C_Src/particle_compare.cpp b/Tools/Postprocessing/C_Src/particle_compare.cpp index 9967625cca6..e9efc69cf62 100644 --- a/Tools/Postprocessing/C_Src/particle_compare.cpp +++ b/Tools/Postprocessing/C_Src/particle_compare.cpp @@ -438,12 +438,15 @@ int main_main() std::string fn2; std::string pt; Real rtol = 0.0; + Real atol = 0.0; int farg=1; while (farg <= narg) { const std::string fname = amrex::get_command_argument(farg); if (fname == "-r" || fname == "--rel_tol") { rtol = std::stod(amrex::get_command_argument(++farg)); + } else if (fname == "--abs_tol") { + atol = std::stod(amrex::get_command_argument(++farg)); } else { break; } @@ -472,6 +475,7 @@ int main_main() << "\n" << " optional arguments:\n" << " -r|--rel_tol rtol : relative tolerance (default is 0)\n" + << " --abs_tol atol : absolute tolerance (default is 0)\n" << std::endl; return EXIT_SUCCESS; } @@ -555,16 +559,25 @@ int main_main() int exit_code = 0; for (int i = 0; i < header1.num_comp; ++i) { - if (global_norms[i+header1.num_comp] > rtol) exit_code = 1; + if (global_norms[i ] > atol && + global_norms[i+header1.num_comp] > rtol) exit_code = 1; } if (exit_code == 0) { - amrex::Print() << " PARTICLES AGREE to relative tolerance " << rtol << "\n"; + if (atol > 0.) { + amrex::Print() << " PARTICLES AGREE to relative tolerance " << rtol << " and/or absolute tolerance " << atol << "\n"; + } else { + amrex::Print() << " PARTICLES AGREE to relative tolerance " << rtol << "\n"; + } } else { - amrex::Print() << " PARTICLES DISAGREE to relative tolerance " << rtol << "\n"; + if (atol > 0.) { + amrex::Print() << " PARTICLES DISAGREE to relative tolerance " << rtol << " and/or absolute tolerance " << atol << "\n"; + } else { + amrex::Print() << " PARTICLES DISAGREE to relative tolerance " << rtol << "\n"; + } } return exit_code; From edb4c25027efbbc465c88d453441dcd7115d8651 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 6 Dec 2023 09:44:58 -0800 Subject: [PATCH 057/186] GNU Make: Fix name collision for aurora (#3656) --- Tools/GNUMake/Make.machines | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tools/GNUMake/Make.machines b/Tools/GNUMake/Make.machines index 1e6293dd001..db91bb99947 100644 --- a/Tools/GNUMake/Make.machines +++ b/Tools/GNUMake/Make.machines @@ -123,9 +123,11 @@ ifeq ($(findstring asterix, $(host_name)), asterix) endif ifeq ($(findstring aurora, $(host_name)), aurora) +ifneq ($(findstring alcf.anl.gov, $(host_name)),alcf.anl.gov) which_site := hs which_computer := aurora endif +endif ifeq ($(findstring kestrel, $(NREL_CLUSTER)), kestrel) which_site := nrel From ccd635716e70b70baf65459897b3fcb01dd0dc49 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 8 Dec 2023 11:00:29 -0800 Subject: [PATCH 058/186] Fix a typo in doxygen for NonLocalBC::FillBoundary (#3658) --- Src/Base/AMReX_NonLocalBC.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_NonLocalBC.H b/Src/Base/AMReX_NonLocalBC.H index f7f22a67198..315081a48b7 100644 --- a/Src/Base/AMReX_NonLocalBC.H +++ b/Src/Base/AMReX_NonLocalBC.H @@ -1129,7 +1129,7 @@ FillBoundary_finish (CommHandler handler, auto cmd = makeFillBoundaryMetaData(mf, mf.nGrowVect, geom, dtos); // The metadata cmd can be cached and reused on a MultiFab/FabArray with // the same BoxArray and DistributionMapping. - FillBoundary_finish(mf, cmd, scomp, ncomp, dtos, proj); + FillBoundary(mf, cmd, scomp, ncomp, dtos, proj); \endverbatim * * The FillBoundary capability here is more flexible than FabArray's From 43d71da32fa49d9d92f87f1f3e29fd67eb6952bb Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 8 Dec 2023 11:25:59 -0800 Subject: [PATCH 059/186] Limit the scope of gpu_rand_generator (#3659) Move gpu_rand_generator from namespace amrex to anonymous namespace. The variable is only used in AMReX_Random.cpp, so there are no reasons for it to be in the amrex namespace. --- Src/Base/AMReX_Random.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Src/Base/AMReX_Random.cpp b/Src/Base/AMReX_Random.cpp index cc791a11fef..506ec582544 100644 --- a/Src/Base/AMReX_Random.cpp +++ b/Src/Base/AMReX_Random.cpp @@ -19,9 +19,15 @@ namespace namespace amrex { #ifdef AMREX_USE_SYCL sycl_rng_descr* rand_engine_descr = nullptr; - oneapi::mkl::rng::philox4x32x10* gpu_rand_generator = nullptr; #else amrex::randState_t* gpu_rand_state = nullptr; +#endif +} + +namespace { +#ifdef AMREX_USE_SYCL + oneapi::mkl::rng::philox4x32x10* gpu_rand_generator = nullptr; +#else amrex::randGenerator_t gpu_rand_generator = nullptr; #endif } From d884f44493c0b0dcd15660aa64fb42a1bdbce0e5 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 11 Dec 2023 11:48:26 -0800 Subject: [PATCH 060/186] Pure SoA Particle: Separate Array for IdCPU (#3585) ## Summary This addresses a regression we see when moving to pure SoA particles: - slightly slower read/write to Ids when needed, e.g., for sorting - issues going up to the full 64bit range ## Additional background Once finished, this will close #3569. ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Andrew Myers --- Src/Particle/AMReX_ParticleContainerI.H | 77 +++++++++++++---- Src/Particle/AMReX_ParticleIO.H | 14 ++- Src/Particle/AMReX_ParticleInit.H | 28 +++++- Src/Particle/AMReX_ParticleTile.H | 91 +++++++++++++++----- Src/Particle/AMReX_ParticleTransformation.H | 6 +- Src/Particle/AMReX_StructOfArrays.H | 24 +++++- Src/Particle/AMReX_WriteBinaryParticleData.H | 16 ++-- Tests/Particles/RedistributeSOA/main.cpp | 12 +++ 8 files changed, 212 insertions(+), 56 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index b845d130d5c..9dd1d39b953 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -18,10 +18,10 @@ ParticleContainer_impl; @@ -1530,7 +1530,7 @@ ParticleContainer_implnumParticles(); ParticleLocData pld; - if constexpr(!ParticleType::is_soa_particle){ + if constexpr (!ParticleType::is_soa_particle){ if (npart != 0) { Long last = npart - 1; @@ -1647,7 +1647,7 @@ ParticleContainer_impl >& not_ours, Particle p; - if constexpr (!ParticleType::is_soa_particle) { - std::memcpy(&p, pbuf, sizeof(ParticleType)); - } else { + if constexpr (ParticleType::is_soa_particle) { + std::memcpy(&p.m_idcpu, pbuf, sizeof(uint64_t)); + ParticleReal pos[AMREX_SPACEDIM]; - std::memcpy(&pos[0], pbuf, AMREX_SPACEDIM*sizeof(ParticleReal)); + std::memcpy(&pos[0], pbuf + sizeof(uint64_t), AMREX_SPACEDIM*sizeof(ParticleReal)); AMREX_D_TERM(p.pos(0) = pos[0];, p.pos(1) = pos[1];, p.pos(2) = pos[2]); - - int idcpu[2]; - std::memcpy(&idcpu[0], pbuf + NumRealComps()*sizeof(ParticleReal), 2*sizeof(int)); - - p.id() = idcpu[0]; - p.cpu() = idcpu[1]; + } else { + std::memcpy(&p, pbuf, sizeof(ParticleType)); } bool success = Where(p, pld, lev_min, lev_max, 0); @@ -2097,7 +2114,12 @@ RedistributeMPI (std::map >& not_ours, rcv_tile[ipart])]; char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size; - if constexpr(! ParticleType::is_soa_particle) { + if constexpr (ParticleType::is_soa_particle) { + uint64_t idcpudata; + std::memcpy(&idcpudata, pbuf, sizeof(uint64_t)); + pbuf += sizeof(uint64_t); + ptile.GetStructOfArrays().GetIdCPUData().push_back(idcpudata); + } else { ParticleType p; std::memcpy(&p, pbuf, sizeof(ParticleType)); pbuf += sizeof(ParticleType); @@ -2146,6 +2168,10 @@ RedistributeMPI (std::map >& not_ours, host_int_attribs.reserve(15); host_int_attribs.resize(finestLevel()+1); + Vector, Gpu::HostVector > > host_idcpu; + host_idcpu.reserve(15); + host_idcpu.resize(finestLevel()+1); + ipart = 0; for (int i = 0; i < nrcvs; ++i) { @@ -2159,7 +2185,15 @@ RedistributeMPI (std::map >& not_ours, char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size; - if constexpr(! ParticleType::is_soa_particle) { + host_real_attribs[lev][ind].resize(NumRealComps()); + host_int_attribs[lev][ind].resize(NumIntComps()); + + if constexpr (ParticleType::is_soa_particle) { + uint64_t idcpudata; + std::memcpy(&idcpudata, pbuf, sizeof(uint64_t)); + pbuf += sizeof(uint64_t); + host_idcpu[lev][ind].push_back(idcpudata); + } else { ParticleType p; std::memcpy(&p, pbuf, sizeof(ParticleType)); pbuf += sizeof(ParticleType); @@ -2210,7 +2244,12 @@ RedistributeMPI (std::map >& not_ours, auto new_size = old_size + src_tile.size(); dst_tile.resize(new_size); - if constexpr(! ParticleType::is_soa_particle) { + if constexpr (ParticleType::is_soa_particle) { + Gpu::copyAsync(Gpu::hostToDevice, + host_idcpu[host_lev][std::make_pair(grid,tile)].begin(), + host_idcpu[host_lev][std::make_pair(grid,tile)].end(), + dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size); + } else { Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(), dst_tile.GetArrayOfStructs().begin() + old_size); diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index e6969c9b1bd..a10f9973a01 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -954,6 +954,10 @@ ParticleContainer_impl, Gpu::HostVector > > host_idcpu; + host_idcpu.reserve(15); + host_idcpu.resize(finestLevel()+1); + for (int i = 0; i < cnt; i++) { // note: for pure SoA particle layouts, we do write the id, cpu and positions as a struct // for backwards compatibility with readers @@ -1021,8 +1025,7 @@ ParticleContainer_impl, Gpu::HostVector > > host_idcpu; + host_idcpu.reserve(15); + host_idcpu.resize(finestLevel()+1); + for (Long j = 0; j < icount; j++) { Particle<0, 0> ptest; @@ -1117,8 +1121,9 @@ InitRandom (Long icount, host_real_attribs[pld.m_lev][ind][i].push_back(pos[j*AMREX_SPACEDIM+i]); } - host_int_attribs[pld.m_lev][ind][0].push_back(ParticleType::NextID()); - host_int_attribs[pld.m_lev][ind][1].push_back(MyProc); + host_idcpu[pld.m_lev][ind].push_back(0); + ParticleIDWrapper(host_idcpu[pld.m_lev][ind].back()) = ParticleType::NextID(); + ParticleCPUWrapper(host_idcpu[pld.m_lev][ind].back()) = ParallelDescriptor::MyProc(); host_particles[pld.m_lev][ind]; @@ -1157,6 +1162,11 @@ InitRandom (Long icount, { Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(), dst_tile.GetArrayOfStructs().begin() + old_size); + } else { + Gpu::copyAsync(Gpu::hostToDevice, + host_idcpu[host_lev][std::make_pair(grid,tile)].begin(), + host_idcpu[host_lev][std::make_pair(grid,tile)].end(), + dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size); } for (int i = 0; i < NArrayReal; ++i) { // NOLINT(readability-misleading-indentation) @@ -1201,6 +1211,10 @@ InitRandom (Long icount, host_int_attribs.reserve(15); host_int_attribs.resize(finestLevel()+1); + Vector, Gpu::HostVector > > host_idcpu; + host_idcpu.reserve(15); + host_idcpu.resize(finestLevel()+1); + for (Long icnt = 0; icnt < M; icnt++) { Particle<0, 0> ptest; for (int i = 0; i < AMREX_SPACEDIM; i++) { @@ -1261,8 +1275,9 @@ InitRandom (Long icount, host_real_attribs[pld.m_lev][ind][i].push_back(ptest.pos(i)); } - host_int_attribs[pld.m_lev][ind][0].push_back(ptest.id()); - host_int_attribs[pld.m_lev][ind][1].push_back(ptest.cpu()); + host_idcpu[pld.m_lev][ind].push_back(0); + ParticleIDWrapper(host_idcpu[pld.m_lev][ind].back()) = ParticleType::NextID(); + ParticleCPUWrapper(host_idcpu[pld.m_lev][ind].back()) = ParallelDescriptor::MyProc(); host_particles[pld.m_lev][ind]; @@ -1300,6 +1315,11 @@ InitRandom (Long icount, { Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(), dst_tile.GetArrayOfStructs().begin() + old_size); + } else { + Gpu::copyAsync(Gpu::hostToDevice, + host_idcpu[host_lev][std::make_pair(grid,tile)].begin(), + host_idcpu[host_lev][std::make_pair(grid,tile)].end(), + dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size); } for (int i = 0; i < NArrayReal; ++i) { // NOLINT(readability-misleading-indentation) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index e35af847ec4..2b60b37d304 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -43,6 +43,7 @@ struct ParticleTileData ParticleType* AMREX_RESTRICT m_aos; + uint64_t* m_idcpu; GpuArray m_rdata; GpuArray m_idata; @@ -67,7 +68,7 @@ struct ParticleTileData if constexpr(!ParticleType::is_soa_particle) { return this->m_aos[index].id(); } else { - return this->m_idata[0][index]; + return ParticleIDWrapper(this->m_idcpu[index]); } } @@ -77,7 +78,17 @@ struct ParticleTileData if constexpr(!ParticleType::is_soa_particle) { return this->m_aos[index].cpu(); } else { - return this->m_idata[1][index]; + return ParticleCPUWrapper(this->m_idcpu[index]); + } + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) idcpu (const int index) const & + { + if constexpr(ParticleType::is_soa_particle) { + return this->m_idcpu[index]; + } else { + amrex::Abort("not implemented"); } } @@ -112,6 +123,9 @@ struct ParticleTileData if constexpr (!ParticleType::is_soa_particle) { memcpy(dst, m_aos + src_index, sizeof(ParticleType)); dst += sizeof(ParticleType); + } else { + memcpy(dst, m_idcpu + src_index, sizeof(uint64_t)); + dst += sizeof(uint64_t); } int array_start_index = AMREX_SPACEDIM + NStructReal; for (int i = 0; i < NAR; ++i) @@ -160,6 +174,9 @@ struct ParticleTileData if constexpr (!ParticleType::is_soa_particle) { memcpy(m_aos + dst_index, src, sizeof(ParticleType)); src += sizeof(ParticleType); + } else { + memcpy(m_idcpu + dst_index, src, sizeof(uint64_t)); + src += sizeof(uint64_t); } int array_start_index = AMREX_SPACEDIM + NStructReal; for (int i = 0; i < NAR; ++i) @@ -231,9 +248,8 @@ struct ParticleTileData { AMREX_ASSERT(index < m_size); SuperParticleType sp; + sp.m_idcpu = m_idcpu[index]; for (int i = 0; i < AMREX_SPACEDIM; ++i) {sp.pos(i) = m_rdata[i][index];} - sp.id() = m_idata[0][index]; - sp.cpu() = m_idata[1][index]; for (int i = 0; i < NAR; ++i) { sp.rdata(i) = m_rdata[i][index]; } @@ -270,6 +286,7 @@ struct ParticleTileData AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void setSuperParticle (const SuperParticleType& sp, int index) const noexcept { + m_idcpu[index] = sp.m_idcpu; for (int i = 0; i < NAR; ++i) { m_rdata[i][index] = sp.rdata(i); } @@ -303,10 +320,10 @@ struct ConstSoAParticle : SoAParticleBase //functions to get id and cpu in the SOA data [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - int cpu () const { return this->m_constparticle_tile_data.m_idata[1][m_index]; } + ConstParticleCPUWrapper cpu () const { return this->m_constparticle_tile_data.m_idcpu[m_index]; } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - int id () const { return this->m_constparticle_tile_data.m_idata[0][m_index]; } + ConstParticleIDWrapper id () const { return this->m_constparticle_tile_data.m_idcpu[m_index]; } //functions to get positions of the particle in the SOA data @@ -366,16 +383,22 @@ struct SoAParticle : SoAParticleBase //functions to get id and cpu in the SOA data [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - int& cpu () & { return this->m_particle_tile_data.m_idata[1][m_index]; } + ParticleCPUWrapper cpu () & { return this->m_particle_tile_data.m_idcpu[m_index]; } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + ParticleIDWrapper id () & { return this->m_particle_tile_data.m_idcpu[m_index]; } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - int& id () & { return this->m_particle_tile_data.m_idata[0][m_index]; } + uint64_t& idcpu () & { return this->m_particle_tile_data.m_idcpu[m_index]; } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const int& cpu () const & { return this->m_particle_tile_data.m_idata[1][m_index]; } + ConstParticleCPUWrapper cpu () const & { return this->m_particle_tile_data.m_idcpu[m_index]; } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const int& id () const & { return this->m_particle_tile_data.m_idata[0][m_index]; } + ConstParticleIDWrapper id () const & { return this->m_particle_tile_data.m_idcpu[m_index]; } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const uint64_t& idcpu () const & { return this->m_particle_tile_data.m_idcpu[m_index]; } //functions to get positions of the particle in the SOA data @@ -477,6 +500,7 @@ struct ConstParticleTileData Long m_size; const ParticleType* AMREX_RESTRICT m_aos; + const uint64_t* m_idcpu; GpuArray m_rdata; GpuArray m_idata; @@ -496,7 +520,7 @@ struct ConstParticleTileData if constexpr(!ParticleType::is_soa_particle) { return this->m_aos[index].id(); } else { - return this->m_idata[0][index]; + return ConstParticleIDWrapper(this->m_idcpu[index]); } } @@ -506,7 +530,17 @@ struct ConstParticleTileData if constexpr(!ParticleType::is_soa_particle) { return this->m_aos[index].cpu(); } else { - return this->m_idata[1][index]; + return ConstParticleCPUWrapper(this->m_idcpu[index]); + } + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) idcpu (const int index) const & + { + if constexpr(ParticleType::is_soa_particle) { + return this->m_idcpu[index]; + } else { + amrex::Abort("not implemented"); } } @@ -546,6 +580,9 @@ struct ConstParticleTileData if constexpr (!ParticleType::is_soa_particle) { memcpy(dst, m_aos + src_index, sizeof(ParticleType)); dst += sizeof(ParticleType); + } else { + memcpy(dst, m_idcpu + src_index, sizeof(uint64_t)); + dst += sizeof(uint64_t); } int array_start_index = AMREX_SPACEDIM + NStructReal; for (int i = 0; i < NArrayReal; ++i) @@ -622,8 +659,7 @@ struct ConstParticleTileData AMREX_ASSERT(index < m_size); SuperParticleType sp; for (int i = 0; i < AMREX_SPACEDIM; ++i) {sp.pos(i) = m_rdata[i][index];} - sp.id() = m_idata[0][index]; - sp.cpu() = m_idata[1][index]; + sp.m_idcpu = m_idcpu[index]; for (int i = 0; i < NAR; ++i) { sp.rdata(i) = m_rdata[i][index]; } @@ -663,7 +699,10 @@ struct ParticleTile ArrayOfStructs>::type; //using ParticleVector = typename AoS::ParticleVector; - using SoA = StructOfArrays; + using SoA = typename std::conditional< + ParticleType::is_soa_particle, + StructOfArrays, + StructOfArrays>::type; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; using StorageParticleType = typename ParticleType::StorageParticleType; @@ -688,7 +727,7 @@ struct ParticleTile if constexpr (!ParticleType::is_soa_particle) { return m_aos_tile[index].id(); } else { - return m_soa_tile.GetIntData(0)[index]; + return ParticleIDWrapper(m_soa_tile.GetIdCPUData()[index]); } } @@ -697,7 +736,7 @@ struct ParticleTile if constexpr (!ParticleType::is_soa_particle) { return m_aos_tile[index].id(); } else { - return m_soa_tile.GetIntData(0)[index]; + return ConstParticleIDWrapper(m_soa_tile.GetIdCPUData()[index]); } } @@ -706,7 +745,7 @@ struct ParticleTile if constexpr (!ParticleType::is_soa_particle) { return m_aos_tile[index].cpu(); } else { - return m_soa_tile.GetIntData(1)[index]; + return ParticleCPUWrapper(m_soa_tile.GetIdCPUData()[index]); } } @@ -715,7 +754,7 @@ struct ParticleTile if constexpr (!ParticleType::is_soa_particle) { return m_aos_tile[index].cpu(); } else { - return m_soa_tile.GetIntData(1)[index]; + return ConstParticleCPUWrapper(m_soa_tile.GetIdCPUData()[index]); } } @@ -873,7 +912,9 @@ struct ParticleTile } m_soa_tile.resize(np+1); - + if constexpr (!ParticleType::is_soa_particle) { + m_soa_tile.GetIdCPUData()[np] = sp.m_idcpu; + } auto& arr_rdata = m_soa_tile.GetRealData(); auto& arr_idata = m_soa_tile.GetIntData(); for (int i = 0; i < NArrayReal; ++i) { @@ -1092,6 +1133,11 @@ struct ParticleTile } else { ptd.m_aos = nullptr; } + if constexpr (ParticleType::is_soa_particle) { + ptd.m_idcpu = m_soa_tile.GetIdCPUData().dataPtr(); + } else { + ptd.m_idcpu = nullptr; + } if constexpr(NArrayReal > 0) { for (int i = 0; i < NArrayReal; ++i) { ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); @@ -1157,6 +1203,11 @@ struct ParticleTile } else { ptd.m_aos = nullptr; } + if constexpr (ParticleType::is_soa_particle) { + ptd.m_idcpu = m_soa_tile.GetIdCPUData().dataPtr(); + } else { + ptd.m_idcpu = nullptr; + } if constexpr(NArrayReal > 0) { for (int i = 0; i < NArrayReal; ++i) { ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H index 28ccfa84a97..aa737455ce6 100644 --- a/Src/Particle/AMReX_ParticleTransformation.H +++ b/Src/Particle/AMReX_ParticleTransformation.H @@ -35,7 +35,11 @@ void copyParticle (const ParticleTileData& dst, AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); AMREX_ASSERT(dst.m_num_runtime_int == src.m_num_runtime_int ); - dst.m_aos[dst_i] = src.m_aos[src_i]; + if constexpr(!T_ParticleType::is_soa_particle) { + dst.m_aos[dst_i] = src.m_aos[src_i]; + } else { + dst.m_idcpu[dst_i] = src.m_idcpu[src_i]; + } if constexpr(NAR > 0) { for (int j = 0; j < NAR; ++j) { dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i]; diff --git a/Src/Particle/AMReX_StructOfArrays.H b/Src/Particle/AMReX_StructOfArrays.H index 0ef3b8ae869..6cd498e20a2 100644 --- a/Src/Particle/AMReX_StructOfArrays.H +++ b/Src/Particle/AMReX_StructOfArrays.H @@ -11,9 +11,11 @@ namespace amrex { template class Allocator=DefaultAllocator> + template class Allocator=DefaultAllocator, + bool use64BitIdCpu=false> struct StructOfArrays { + using IdCPU = amrex::PODVector >; using RealVector = amrex::PODVector >; using IntVector = amrex::PODVector >; @@ -28,9 +30,12 @@ struct StructOfArrays { [[nodiscard]] int NumIntComps () const noexcept { return NInt + m_runtime_idata.size(); } + [[nodiscard]] IdCPU& GetIdCPUData () { return m_idcpu; } [[nodiscard]] std::array& GetRealData () { return m_rdata; } [[nodiscard]] std::array< IntVector, NInt>& GetIntData () { return m_idata; } + /** Get access to the particle id/cpu Arrays */ + [[nodiscard]] const IdCPU& GetIdCPUData () const { return m_idcpu; } /** Get access to the particle Real Arrays (only compile-time components) */ [[nodiscard]] const std::array& GetRealData () const { return m_rdata; } /** Get access to the particle Int Arrays (only compile-time components) */ @@ -119,7 +124,9 @@ struct StructOfArrays { */ [[nodiscard]] std::size_t size () const { - if constexpr (NReal > 0) { + if constexpr (use64BitIdCpu == true) { + return m_idcpu.size(); + } else if constexpr (NReal > 0) { return m_rdata[0].size(); } else if constexpr (NInt > 0) { return m_idata[0].size(); @@ -175,6 +182,9 @@ struct StructOfArrays { void resize (size_t count) { + if constexpr (use64BitIdCpu == true) { + m_idcpu.resize(count); + } if constexpr (NReal > 0) { for (int i = 0; i < NReal; ++i) { m_rdata[i].resize(count); } } @@ -185,6 +195,15 @@ struct StructOfArrays { for (int i = 0; i < int(m_runtime_idata.size()); ++i) { m_runtime_idata[i].resize(count); } } + [[nodiscard]] IdCPU* idcpuarray () { + if constexpr (use64BitIdCpu == true) { + return m_idcpu.dataPtr(); + } else { + return nullptr; + } + + } + [[nodiscard]] GpuArray realarray () { GpuArray arr; @@ -208,6 +227,7 @@ struct StructOfArrays { int m_num_neighbor_particles{0}; private: + IdCPU m_idcpu; std::array m_rdata; std::array< IntVector, NInt> m_idata; diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 31ca7f8df6a..d3cafc5be58 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -338,9 +338,10 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l else { amrex::ignore_unused(is_checkpoint); // Int: id, cpu - *iptr = soa.GetIntData(0)[pindex]; + uint64_t idcpu = soa.GetIdCPUData()[pindex]; + *iptr = (int) ParticleIDWrapper(idcpu); iptr += 1; - *iptr = soa.GetIntData(1)[pindex]; + *iptr = (int) ParticleCPUWrapper(idcpu); iptr += 1; // Real: position @@ -348,8 +349,8 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l rptr += AMREX_SPACEDIM; } - // extra SoA int data - const int int_start_offset = PC::ParticleType::is_soa_particle ? 2 : 0; // pure SoA: skip id, cpu + // SoA int data + const int int_start_offset = 0; for (int j = int_start_offset; j < pc.NumIntComps(); j++) { if (write_int_comp[PC::NStructInt+j]) { *iptr = soa.GetIntData(j)[pindex]; @@ -1021,14 +1022,15 @@ void WriteBinaryParticleDataAsync (PC const& pc, } else { // Ints: id, cpu - *iptr = soa.GetIntData(0)[pindex]; + uint64_t idcpu = soa.GetIdCPUData()[pindex]; + *iptr = (int) ParticleIDWrapper(idcpu); iptr += 1; - *iptr = soa.GetIntData(1)[pindex]; + *iptr = (int) ParticleCPUWrapper(idcpu); iptr += 1; } // extra SoA Ints - const int int_start_offset = PC::ParticleType::is_soa_particle ? 2 : 0; // pure SoA: skip id, cpu + const int int_start_offset = 0; for (int j = int_start_offset; j < nic; j++) { if (write_int_comp[NStructInt+j]) diff --git a/Tests/Particles/RedistributeSOA/main.cpp b/Tests/Particles/RedistributeSOA/main.cpp index 94715b7d6aa..62da81def81 100644 --- a/Tests/Particles/RedistributeSOA/main.cpp +++ b/Tests/Particles/RedistributeSOA/main.cpp @@ -92,6 +92,7 @@ class TestParticleContainer { const Box& tile_box = mfi.tilebox(); + Gpu::HostVector host_idcpu; std::array, NR> host_real; std::array, NI> host_int; @@ -106,6 +107,10 @@ class TestParticleContainer amrex::Long id = ParticleType::NextID(); + host_idcpu.push_back(0); + ParticleIDWrapper(host_idcpu.back()) = id; + ParticleCPUWrapper(host_idcpu.back()) = ParallelDescriptor::MyProc(); + host_int[0].push_back(static_cast(id)); host_int[1].push_back(ParallelDescriptor::MyProc()); host_real[0].push_back(static_cast (plo[0] + (iv[0] + r[0])*dx[0])); @@ -137,6 +142,13 @@ class TestParticleContainer particle_tile.resize(new_size); auto& soa = particle_tile.GetStructOfArrays(); + { + Gpu::copyAsync(Gpu::hostToDevice, + host_idcpu.begin(), + host_idcpu.end(), + soa.GetIdCPUData().begin() + old_size); + + } for (int i = 0; i < NR; ++i) { Gpu::copyAsync(Gpu::hostToDevice, From ecaa46d0be4b5c79b8806e48e3469000d8bb7252 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 11 Dec 2023 20:38:06 -0800 Subject: [PATCH 061/186] Fix SuperParticle `push_back` (#3661) ## Summary From #3585 commit: fixes a segfault for the legacy particle layout: ``` Thread 1 "python3" received signal SIGSEGV, Segmentation fault. 0x00007ffff5951e5c in amrex::ParticleTile, 2, 1, std::allocator>::push_back<2, 1, 0> (this=0x555555f206e0, sp=...) at /home/axel/src/pyamrex/build/_deps/fetchedamrex-src/Src/Particle/AMReX_ParticleTile.H:916 916 m_soa_tile.GetIdCPUData()[np] = sp.m_idcpu; ``` ## Additional background X-ref: https://github.com/AMReX-Codes/pyamrex/pull/232 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Particle/AMReX_ParticleTile.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 2b60b37d304..a1bdbdd56e2 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -912,7 +912,7 @@ struct ParticleTile } m_soa_tile.resize(np+1); - if constexpr (!ParticleType::is_soa_particle) { + if constexpr (ParticleType::is_soa_particle) { m_soa_tile.GetIdCPUData()[np] = sp.m_idcpu; } auto& arr_rdata = m_soa_tile.GetRealData(); From 064db4eaa5992788555ebd5e52824b885f8d0340 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 12 Dec 2023 15:48:52 -0800 Subject: [PATCH 062/186] Fix Resize Issue of Fab with the Async Arena (#3663) ## Summary Previously there was an issue with resizing Fabs using The_Async_Arena. The issue was the previous allocation during resize might be done on a different stream. This commit fixes the issue and makes the following patterns work. FArrayBox tmp0(The_Async_Arena()); FArrayBox tmp1(The_Async_Arena()); FArrayBox tmp2; for (MFIter ...) { tmp0.resize(box,ncomp,The_Async_Arena()); tmp1.resize(box,ncomp); tmp2.resize(box,ncomp,The_Async_Arena()); } ## Additional background https://github.com/AMReX-Astro/Castro/pull/2677 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_Arena.H | 5 +++++ Src/Base/AMReX_BaseFab.H | 25 ++++++++++++++++++++++++- Src/Base/AMReX_GpuTypes.H | 1 + Src/Base/AMReX_PArena.H | 5 +++++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H index e42ebdc9cd2..d328f693a96 100644 --- a/Src/Base/AMReX_Arena.H +++ b/Src/Base/AMReX_Arena.H @@ -157,6 +157,11 @@ public: */ virtual void registerForProfiling (const std::string& memory_name); +#ifdef AMREX_USE_GPU + //! Is this GPU stream ordered memory allocator? + [[nodiscard]] virtual bool isStreamOrderedArena () const { return false; } +#endif + /** * \brief Given a minimum required arena size of sz bytes, this returns * the next largest arena size that will align to align_size bytes diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index 006d7639ad6..e0331e7a0ae 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -1631,6 +1631,9 @@ protected: Long truesize = 0L; //!< nvar*numpts that was allocated on heap. bool ptr_owner = false; //!< Owner of T*? bool shared_memory = false; //!< Is the memory allocated in shared memory? +#ifdef AMREX_USE_GPU + gpuStream_t alloc_stream{}; +#endif }; template @@ -1902,6 +1905,9 @@ BaseFab::define () this->truesize = this->nvar*this->domain.numPts(); this->ptr_owner = true; this->dptr = static_cast(this->alloc(this->truesize*sizeof(T))); +#ifdef AMREX_USE_GPU + this->alloc_stream = Gpu::gpuStream(); +#endif placementNew(this->dptr, this->truesize); @@ -2003,6 +2009,9 @@ BaseFab::BaseFab (BaseFab&& rhs) noexcept dptr(rhs.dptr), domain(rhs.domain), nvar(rhs.nvar), truesize(rhs.truesize), ptr_owner(rhs.ptr_owner), shared_memory(rhs.shared_memory) +#ifdef AMREX_USE_GPU + , alloc_stream(rhs.alloc_stream) +#endif { rhs.dptr = nullptr; rhs.ptr_owner = false; @@ -2021,6 +2030,9 @@ BaseFab::operator= (BaseFab&& rhs) noexcept truesize = rhs.truesize; ptr_owner = rhs.ptr_owner; shared_memory = rhs.shared_memory; +#ifdef AMREX_USE_GPU + alloc_stream = rhs.alloc_stream; +#endif rhs.dptr = nullptr; rhs.ptr_owner = false; @@ -2062,7 +2074,11 @@ BaseFab::resize (const Box& b, int n, Arena* ar) this->dptr = nullptr; define(); } - else if (this->nvar*this->domain.numPts() > this->truesize) + else if (this->nvar*this->domain.numPts() > this->truesize +#ifdef AMREX_USE_GPU + || (arena()->isStreamOrderedArena() && alloc_stream != Gpu::gpuStream()) +#endif + ) { if (this->shared_memory) { amrex::Abort("BaseFab::resize: BaseFab in shared memory cannot increase size"); @@ -2114,7 +2130,14 @@ BaseFab::clear () noexcept placementDelete(this->dptr, this->truesize); +#ifdef AMREX_USE_GPU + auto current_stream = Gpu::Device::gpuStream(); + Gpu::Device::setStream(alloc_stream); +#endif this->free(this->dptr); +#ifdef AMREX_USE_GPU + Gpu::Device::setStream(current_stream); +#endif if (this->nvar > 1) { amrex::update_fab_stats(-this->truesize/this->nvar, -this->truesize, sizeof(T)); diff --git a/Src/Base/AMReX_GpuTypes.H b/Src/Base/AMReX_GpuTypes.H index 8b5680b41b8..ecb992983ba 100644 --- a/Src/Base/AMReX_GpuTypes.H +++ b/Src/Base/AMReX_GpuTypes.H @@ -29,6 +29,7 @@ struct Dim1 { struct gpuStream_t { sycl::queue* queue = nullptr; bool operator== (gpuStream_t const& rhs) noexcept { return queue == rhs.queue; } + bool operator!= (gpuStream_t const& rhs) noexcept { return queue != rhs.queue; } }; #endif diff --git a/Src/Base/AMReX_PArena.H b/Src/Base/AMReX_PArena.H index cc221ba7bae..d2e8c8ebec3 100644 --- a/Src/Base/AMReX_PArena.H +++ b/Src/Base/AMReX_PArena.H @@ -38,6 +38,11 @@ public: [[nodiscard]] bool isDevice () const final; [[nodiscard]] bool isPinned () const final; +#ifdef AMREX_USE_GPU + //! Is this CUDA stream ordered memory allocator? + [[nodiscard]] bool isStreamOrderedArena () const final { return true; } +#endif + #ifdef AMREX_CUDA_GE_11_2 private: cudaMemPool_t m_pool; From cc4c4ffeb28c1db62768eebe68ac81f34c0112f2 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 13 Dec 2023 10:05:00 -0800 Subject: [PATCH 063/186] Fix warnings in SortParticlesForDeposition (#3664) --- Src/Particle/AMReX_ParticleUtil.H | 1 + 1 file changed, 1 insertion(+) diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 5430cd34037..03fe2a29d65 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -675,6 +675,7 @@ void PermutationForDeposition (Gpu::DeviceVector& perm, index_type n } }); #else + amrex::ignore_unused(pperm, pglobal_idx); Abort("Not implemented"); #endif From b903e8896afbb202cf5dca786ba75b0cb598ad02 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 13 Dec 2023 11:53:12 -0800 Subject: [PATCH 064/186] Documentation for Profiling: Hot Spots and Load Balance (#3622) Add more documentation on identifying hot spots and load imbalance in profiling results. --------- Co-authored-by: Andrew Myers --- .../source/AMReX_Profiling_Tools.rst | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst b/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst index 8726f51a2ba..cdd774488e4 100644 --- a/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst +++ b/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst @@ -93,6 +93,47 @@ it is also recommended to wrap any ``BL_PROFILE_TINY_FLUSH();`` calls in informative ``amrex::Print()`` lines to ensure accurate identification of each set of timers. +Hot Spots and Load Balance +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The output of TinyProfiler can help us to identify hot spots. For example, +the following output shows the top three hot spots of a linear solver test +running on 4 MPI processes. + +.. highlight:: console + +:: + + -------------------------------------------------------------------------------------------- + Name NCalls Excl. Min Excl. Avg Excl. Max Max % + -------------------------------------------------------------------------------------------- + MLPoisson::Fsmooth() 560 0.4775 0.4793 0.4815 34.97% + MLPoisson::Fapply() 114 0.1103 0.113 0.1167 8.48% + FabArray::Xpay() 109 0.1 0.1013 0.1038 7.54% + +In this test, there are 16 boxes evenly distributed among 4 MPI processes. The +output above shows that the load is perfectly balanced. However, if the load +is not balanced, the results can be very different and sometimes +misleading. For example, if we put 2, 2, 6 and 6 boxes on processes 0, 1, 2 +and 3, respectively, the top three hot spots now include two MPI +communication functions, ``FillBoundary`` and ``ParallelCopy``. + +.. highlight:: console + +:: + + -------------------------------------------------------------------------------------------- + Name NCalls Excl. Min Excl. Avg Excl. Max Max % + -------------------------------------------------------------------------------------------- + FillBoundary_finish() 607 0.01568 0.3367 0.6574 41.97% + MLPoisson::Fsmooth() 560 0.2133 0.4047 0.5973 38.13% + FabArray::ParallelCopy_finish() 231 0.002977 0.09748 0.1895 12.10% + +The reason that the MPI communication appears slow is that the lightly +loaded processes have to wait for messages sent by the heavily loaded +processes. See also :ref:`sec:profopts` for a diagnostic option that may +provide more insight on the load imbalance. + .. _sec:full:profiling: Full Profiling From d988c985950bc0e6733db672a62cd19b8c046c9a Mon Sep 17 00:00:00 2001 From: Edward Basso Date: Thu, 14 Dec 2023 09:45:24 -0800 Subject: [PATCH 065/186] Delete empty below comments on classes and functions (#3669) Deleting the empty lines between the Doxygen comments and the class declarations makes Intellisense actually recognize the connections. --- Src/Amr/AMReX_Amr.H | 1 - Src/Amr/AMReX_AmrLevel.H | 1 - Src/Amr/AMReX_Derive.H | 2 -- Src/Amr/AMReX_LevelBld.H | 1 - Src/Amr/AMReX_StateData.H | 1 - Src/Amr/AMReX_StateDescriptor.H | 2 -- Src/AmrCore/AMReX_AmrCore.H | 1 - Src/AmrCore/AMReX_Cluster.H | 2 -- Src/AmrCore/AMReX_ErrorList.H | 2 -- Src/AmrCore/AMReX_FillPatcher.H | 1 - Src/AmrCore/AMReX_FluxRegister.H | 1 - Src/AmrCore/AMReX_InterpFaceRegister.H | 1 - Src/AmrCore/AMReX_Interpolater.H | 11 ------- Src/AmrCore/AMReX_TagBox.H | 2 -- Src/Base/AMReX_Arena.H | 1 - Src/Base/AMReX_BArena.H | 1 - Src/Base/AMReX_BCRec.H | 5 ++-- Src/Base/AMReX_BaseFab.H | 1 - Src/Base/AMReX_BoxDomain.H | 8 ++--- Src/Base/AMReX_BoxList.H | 1 - Src/Base/AMReX_CArena.H | 1 - Src/Base/AMReX_CoordSys.H | 1 - Src/Base/AMReX_DistributionMapping.H | 1 - Src/Base/AMReX_FACopyDescriptor.H | 1 - Src/Base/AMReX_FPC.H | 1 - Src/Base/AMReX_Geometry.H | 16 +++++----- Src/Base/AMReX_IArrayBox.H | 1 - Src/Base/AMReX_IndexType.H | 1 - Src/Base/AMReX_IntVect.H | 1 - Src/Base/AMReX_MultiFabUtil.H | 30 +++++++++---------- Src/Base/AMReX_NFiles.H | 1 - Src/Base/AMReX_Orientation.H | 1 - Src/Base/AMReX_PArena.H | 1 - Src/Base/AMReX_ParmParse.H | 1 - Src/Base/AMReX_ParmParse.cpp | 1 - Src/Base/AMReX_RealVect.H | 1 - Src/Base/AMReX_RungeKutta.H | 3 +- Src/Base/AMReX_Vector.H | 1 - Src/Base/AMReX_VisMF.H | 1 - Src/Boundary/AMReX_BoundCond.H | 1 - Src/Boundary/AMReX_FabSet.H | 1 - Src/Boundary/AMReX_Mask.H | 1 - Src/Boundary/AMReX_YAFluxRegister.H | 1 - Src/EB/AMReX_EBFluxRegister.H | 1 - Src/Extern/Bittree/AMReX_Bittree.H | 1 - .../SUNDIALS/AMReX_NVector_MultiFab.cpp | 3 -- Src/Extern/SUNDIALS/AMReX_SUNMemory.H | 1 - Src/Extern/SUNDIALS/AMReX_Sundials_Core.H | 3 -- Src/Particle/AMReX_ParticleTransformation.H | 2 -- Src/Particle/AMReX_ParticleUtil.H | 1 - 50 files changed, 29 insertions(+), 98 deletions(-) diff --git a/Src/Amr/AMReX_Amr.H b/Src/Amr/AMReX_Amr.H index a7173fd105a..bb18ec9d160 100644 --- a/Src/Amr/AMReX_Amr.H +++ b/Src/Amr/AMReX_Amr.H @@ -30,7 +30,6 @@ class AmrInSituBridge; * not belong on a single level, like establishing and updating the hierarchy * of levels, global timestepping, and managing the different AmrLevels */ - class Amr : public AmrCore { diff --git a/Src/Amr/AMReX_AmrLevel.H b/Src/Amr/AMReX_AmrLevel.H index d4ac6c7c70d..8abb00b5475 100644 --- a/Src/Amr/AMReX_AmrLevel.H +++ b/Src/Amr/AMReX_AmrLevel.H @@ -34,7 +34,6 @@ class TagBoxArray; * AmrLevel functions both as a container for state data on a level * and also manages the advancement of data in time. */ - class AmrLevel { friend class Amr; diff --git a/Src/Amr/AMReX_Derive.H b/Src/Amr/AMReX_Derive.H index e1a7310a7b2..1e0cceb7894 100644 --- a/Src/Amr/AMReX_Derive.H +++ b/Src/Amr/AMReX_Derive.H @@ -100,7 +100,6 @@ class DescriptorList; * from the state data contained in AmrLevel and its derivatives. Some * examples might be kinetic energy, vorticity, concentration gradients ... */ - class DeriveRec { friend class DeriveList; @@ -339,7 +338,6 @@ private: * * DeriveList manages and provides access to the list of DeriveRecs. */ - class DeriveList { public: diff --git a/Src/Amr/AMReX_LevelBld.H b/Src/Amr/AMReX_LevelBld.H index 8b421265bfc..bb79184ca5e 100644 --- a/Src/Amr/AMReX_LevelBld.H +++ b/Src/Amr/AMReX_LevelBld.H @@ -18,7 +18,6 @@ namespace amrex { * Abstract base class specifying an interface for building problem-specific * AmrLevels. */ - class LevelBld { public: diff --git a/Src/Amr/AMReX_StateData.H b/Src/Amr/AMReX_StateData.H index 251e6482a45..e6edb486c41 100644 --- a/Src/Amr/AMReX_StateData.H +++ b/Src/Amr/AMReX_StateData.H @@ -29,7 +29,6 @@ class StateDataPhysBCFunct; * * StateData holds state data on a level for the current and previous time step. */ - class StateData { friend class StateDataPhysBCFunct; diff --git a/Src/Amr/AMReX_StateDescriptor.H b/Src/Amr/AMReX_StateDescriptor.H index 6cd6c92cdd3..2830b955705 100644 --- a/Src/Amr/AMReX_StateDescriptor.H +++ b/Src/Amr/AMReX_StateDescriptor.H @@ -29,7 +29,6 @@ namespace amrex { /** * \brief Attributes of StateData. */ - class StateDescriptor { friend class DescriptorList; @@ -434,7 +433,6 @@ private: * * A container class for StateDescriptors. */ - class DescriptorList { public: diff --git a/Src/AmrCore/AMReX_AmrCore.H b/Src/AmrCore/AMReX_AmrCore.H index 20428b40930..2969b986a75 100644 --- a/Src/AmrCore/AMReX_AmrCore.H +++ b/Src/AmrCore/AMReX_AmrCore.H @@ -20,7 +20,6 @@ class AmrParGDB; * virtual functions to allocate, initialize and delete data. It also * requires the derived class to tag cells for refinement. */ - class AmrCore : public AmrMesh { diff --git a/Src/AmrCore/AMReX_Cluster.H b/Src/AmrCore/AMReX_Cluster.H index 5bbf5c796bc..7d60131e6cf 100644 --- a/Src/AmrCore/AMReX_Cluster.H +++ b/Src/AmrCore/AMReX_Cluster.H @@ -20,7 +20,6 @@ class ClusterList; * * Utility class for tagging error cells. */ - class Cluster { public: @@ -138,7 +137,6 @@ private: * * A container class for Cluster. */ - class ClusterList { public: diff --git a/Src/AmrCore/AMReX_ErrorList.H b/Src/AmrCore/AMReX_ErrorList.H index 9ab1a978966..ab4395d8ce8 100644 --- a/Src/AmrCore/AMReX_ErrorList.H +++ b/Src/AmrCore/AMReX_ErrorList.H @@ -102,7 +102,6 @@ extern "C" * actual error tagging will be through derivation, so provision is made * for this as well. */ - class ErrorRec { public: @@ -348,7 +347,6 @@ private: * * Container class for ErrorRecs. */ - class ErrorList { public: diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H index d36b3529efd..5ff1c9550d3 100644 --- a/Src/AmrCore/AMReX_FillPatcher.H +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -68,7 +68,6 @@ namespace amrex { * See AmrLevel::RK for an example of using the RungeKutta functions and * FillPatcher together. */ - template class FillPatcher { diff --git a/Src/AmrCore/AMReX_FluxRegister.H b/Src/AmrCore/AMReX_FluxRegister.H index 4178eb289ba..f5983e18872 100644 --- a/Src/AmrCore/AMReX_FluxRegister.H +++ b/Src/AmrCore/AMReX_FluxRegister.H @@ -14,7 +14,6 @@ namespace amrex { * * Stores and manipulates fluxes at coarse-fine interfaces. */ - class FluxRegister : public BndryRegister diff --git a/Src/AmrCore/AMReX_InterpFaceRegister.H b/Src/AmrCore/AMReX_InterpFaceRegister.H index a63c2c23e4c..c54879bcaf6 100644 --- a/Src/AmrCore/AMReX_InterpFaceRegister.H +++ b/Src/AmrCore/AMReX_InterpFaceRegister.H @@ -12,7 +12,6 @@ namespace amrex { * \brief InterpFaceRegister is a coarse/fine boundary register for * interpolation of face data at the coarse/fine boundary. */ - class InterpFaceRegister { public: diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H index d2fe66b0cbd..e1210a83329 100644 --- a/Src/AmrCore/AMReX_Interpolater.H +++ b/Src/AmrCore/AMReX_Interpolater.H @@ -17,7 +17,6 @@ class IArrayBox; * * Specifies interpolater interface for coarse-to-fine interpolation in space. */ - class Interpolater : public InterpBase { @@ -160,7 +159,6 @@ public: * * Bilinear interpolation on node centered data. */ - class NodeBilinear : public Interpolater @@ -219,7 +217,6 @@ public: * * Bilinear interpolation on cell centered data. */ - class CellBilinear : public Interpolater @@ -286,7 +283,6 @@ public: * sum_ivar a(ic,jc,ivar)*fab(if,jf,ivar) = 0 is satisfied * in all fine cells if,jf covering coarse cell ic,jc. */ - class CellConservativeLinear : public Interpolater @@ -344,7 +340,6 @@ protected: * Linear conservative interpolation on cell centered data * but with protection against undershoots or overshoots. */ - class CellConservativeProtected : public CellConservativeLinear @@ -393,7 +388,6 @@ public: * * Quadratic interpolation on cell centered data. */ - class CellQuadratic : public Interpolater @@ -451,7 +445,6 @@ public: /** * \brief Piecewise Constant interpolation on cell centered data. */ - class PCInterp : public Interpolater @@ -512,7 +505,6 @@ public: * in constructing the polynomial, the average of the polynomial inside that * cell is equal to the cell averaged value of the original data. */ - class CellConservativeQuartic : public Interpolater @@ -574,7 +566,6 @@ public: * a given coarse cell will have the same divergence, even when the coarse * grid divergence is spatially varying. */ - class FaceDivFree : public Interpolater @@ -667,7 +658,6 @@ public: * * Bilinear interpolation on data. */ - class FaceLinear : public Interpolater @@ -789,7 +779,6 @@ public: * * Quartic interpolation on cell centered data. */ - class CellQuartic : public Interpolater diff --git a/Src/AmrCore/AMReX_TagBox.H b/Src/AmrCore/AMReX_TagBox.H index 929e181e0e8..3d26f76e9cb 100644 --- a/Src/AmrCore/AMReX_TagBox.H +++ b/Src/AmrCore/AMReX_TagBox.H @@ -20,7 +20,6 @@ namespace amrex { * * This class is used to tag cells in a Box that need addition refinement. */ - class TagBox final : public BaseFab @@ -145,7 +144,6 @@ public: * * A container class for TagBoxes. */ - class TagBoxArray : public FabArray diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H index d328f693a96..b93c476f86b 100644 --- a/Src/Base/AMReX_Arena.H +++ b/Src/Base/AMReX_Arena.H @@ -82,7 +82,6 @@ struct ArenaInfo * A virtual base class for objects that manage their own dynamic * memory allocation. */ - class Arena { public: diff --git a/Src/Base/AMReX_BArena.H b/Src/Base/AMReX_BArena.H index 9a3b4aa0f1b..d587d100859 100644 --- a/Src/Base/AMReX_BArena.H +++ b/Src/Base/AMReX_BArena.H @@ -11,7 +11,6 @@ namespace amrex { * This is the simplest dynamic memory management class derived from Arena. * Makes calls to std::malloc and std::free. */ - class BArena : public Arena diff --git a/Src/Base/AMReX_BCRec.H b/Src/Base/AMReX_BCRec.H index 268147a3a04..d23da777eda 100644 --- a/Src/Base/AMReX_BCRec.H +++ b/Src/Base/AMReX_BCRec.H @@ -10,10 +10,9 @@ namespace amrex { /** * \brief Boundary Condition Records. * Necessary information and functions for computing boundary conditions. +* +* This class has standard layout. And we should keep it so! */ - -// This class has standard layout. And we should keep it so! - class BCRec { public: diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index e0331e7a0ae..eb8e5c59615 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -3528,7 +3528,6 @@ BaseFab::protected_divide (const BaseFab& src, const Box& srcbox, const Bo * and stored in component comp of this FAB. * This fab is returned as a reference for chaining. */ - template template BaseFab& diff --git a/Src/Base/AMReX_BoxDomain.H b/Src/Base/AMReX_BoxDomain.H index a82e5ddc72e..af92d631607 100644 --- a/Src/Base/AMReX_BoxDomain.H +++ b/Src/Base/AMReX_BoxDomain.H @@ -55,14 +55,12 @@ std::ostream& operator<< (std::ostream& os, const BoxDomain& bd); /** * \brief A List of Disjoint Boxes. +* * A BoxDomain is a BoxList with the restriction that Boxes in the list * are disjoint. +* Note that a BoxDomain is NOT a BoxList due to the protected inheritance. +* This is a concrete class, not a polymorphic one. */ - -//Note that a BoxDomain is NOT a BoxList due to the protected inheritance. -//This is a concrete class, not a polymorphic one. - - class BoxDomain : protected BoxList diff --git a/Src/Base/AMReX_BoxList.H b/Src/Base/AMReX_BoxList.H index c0ff30025ff..cab414d36c8 100644 --- a/Src/Base/AMReX_BoxList.H +++ b/Src/Base/AMReX_BoxList.H @@ -48,7 +48,6 @@ namespace amrex * IndexType. This class implements operations for sets of Boxes. * This is a concrete class, not a polymorphic one. */ - class BoxList { public: diff --git a/Src/Base/AMReX_CArena.H b/Src/Base/AMReX_CArena.H index 163039df2ef..9547bc92f21 100644 --- a/Src/Base/AMReX_CArena.H +++ b/Src/Base/AMReX_CArena.H @@ -24,7 +24,6 @@ struct MemStat; * chunks of heap space and apportions it out as requested. It merges * together neighboring chunks on each free(). */ - class CArena : public Arena diff --git a/Src/Base/AMReX_CoordSys.H b/Src/Base/AMReX_CoordSys.H index ab946ffa3d0..24096c6f428 100644 --- a/Src/Base/AMReX_CoordSys.H +++ b/Src/Base/AMReX_CoordSys.H @@ -20,7 +20,6 @@ class FArrayBox; * * Routines for mapping between physical coordinate system and index space. */ - class CoordSys { public: diff --git a/Src/Base/AMReX_DistributionMapping.H b/Src/Base/AMReX_DistributionMapping.H index 0707532a0fc..e9aa82f16a2 100644 --- a/Src/Base/AMReX_DistributionMapping.H +++ b/Src/Base/AMReX_DistributionMapping.H @@ -37,7 +37,6 @@ class FabArrayBase; * BoxArray are as equal across CPUs as is possible. The SFC distribution is * based on a space filling curve. */ - class DistributionMapping { public: diff --git a/Src/Base/AMReX_FACopyDescriptor.H b/Src/Base/AMReX_FACopyDescriptor.H index 7e1e383d237..ca7d3f47029 100644 --- a/Src/Base/AMReX_FACopyDescriptor.H +++ b/Src/Base/AMReX_FACopyDescriptor.H @@ -103,7 +103,6 @@ FabCopyDescriptor::~FabCopyDescriptor () * \brief This class orchestrates filling a destination fab of size destFabBox * from fabarray on the local processor (myProc). */ - template class FabArrayCopyDescriptor { diff --git a/Src/Base/AMReX_FPC.H b/Src/Base/AMReX_FPC.H index 77c4dfa923b..8975ed8e9b9 100644 --- a/Src/Base/AMReX_FPC.H +++ b/Src/Base/AMReX_FPC.H @@ -15,7 +15,6 @@ namespace amrex { * namespaces, and we don't like global constants, we make them static * constant data members of this class. */ - class FPC { public: diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 4017273151a..550b42f2f6a 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -16,14 +16,6 @@ #include namespace amrex { -/** -* \class Geometry -* \brief Rectangular problem domain geometry. -* -* This class describes problem domain and coordinate system for -* RECTANGULAR problem domains. Since the problem domain is RECTANGULAR, -* periodicity is meaningful. -*/ class MultiFab; class DistributionMapping; @@ -67,6 +59,14 @@ public: int coord; }; +/** + * \class Geometry + * \brief Rectangular problem domain geometry. + * + * This class describes problem domain and coordinate system for + * RECTANGULAR problem domains. Since the problem domain is RECTANGULAR, + * periodicity is meaningful. + */ class Geometry : public CoordSys diff --git a/Src/Base/AMReX_IArrayBox.H b/Src/Base/AMReX_IArrayBox.H index b5240395f02..db0f26d5080 100644 --- a/Src/Base/AMReX_IArrayBox.H +++ b/Src/Base/AMReX_IArrayBox.H @@ -41,7 +41,6 @@ public: * This class does NOT provide a copy constructor or assignment operator. */ - class IArrayBox : public BaseFab diff --git a/Src/Base/AMReX_IndexType.H b/Src/Base/AMReX_IndexType.H index 02a56aae2a2..0fd613d2a99 100644 --- a/Src/Base/AMReX_IndexType.H +++ b/Src/Base/AMReX_IndexType.H @@ -19,7 +19,6 @@ namespace amrex { * enumerated type CellIndex to be either CELL or NODE; i.e. each of the * AMREX_SPACEDIM dimensions must be either CELL or NODE. */ - class IndexType { friend MPI_Datatype ParallelDescriptor::Mpi_typemap::type(); diff --git a/Src/Base/AMReX_IntVect.H b/Src/Base/AMReX_IntVect.H index fd71c93ae87..b2658a5ec94 100644 --- a/Src/Base/AMReX_IntVect.H +++ b/Src/Base/AMReX_IntVect.H @@ -42,7 +42,6 @@ int coarsen (int i, int ratio) noexcept * C++ array. In addition, the basic arithmetic operators have been overloaded * to implement scaling and translation operations. */ - class IntVect { friend MPI_Datatype ParallelDescriptor::Mpi_typemap::type(); diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index 29af89ba88e..ca9b1ab7fff 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -637,13 +637,13 @@ void average_down (const FabArray& S_fine, FabArray& S_crse, - /** - * \brief Returns part of a norm based on two MultiFabs - * The MultiFabs MUST have the same underlying BoxArray. - * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n)) - * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n) - */ - +/** + * \brief Returns part of a norm based on two MultiFabs. + * + * The MultiFabs MUST have the same underlying BoxArray. + * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n)) + * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n) + */ template Real NormHelper (const MultiFab& x, int xcomp, @@ -696,14 +696,14 @@ NormHelper (const MultiFab& x, int xcomp, return sm; } - /** - * \brief Returns part of a norm based on three MultiFabs - * The MultiFabs MUST have the same underlying BoxArray. - * The Predicate pf is used to test the mask - * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n)) - * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n) - */ - +/** + * \brief Returns part of a norm based on three MultiFabs + * + * The MultiFabs MUST have the same underlying BoxArray. + * The Predicate pf is used to test the mask + * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n)) + * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n) + */ template Real NormHelper (const MMF& mask, diff --git a/Src/Base/AMReX_NFiles.H b/Src/Base/AMReX_NFiles.H index 824351b50dc..bd1518dd44c 100644 --- a/Src/Base/AMReX_NFiles.H +++ b/Src/Base/AMReX_NFiles.H @@ -23,7 +23,6 @@ namespace amrex { * nfi.Stream().write((const char *) data.dataPtr(), nChars); * } */ - class NFilesIter { public: diff --git a/Src/Base/AMReX_Orientation.H b/Src/Base/AMReX_Orientation.H index 263bb84a95e..61e3622b336 100644 --- a/Src/Base/AMReX_Orientation.H +++ b/Src/Base/AMReX_Orientation.H @@ -25,7 +25,6 @@ class OrientationIter; * AMREX_SPACEDIM-1 and then the AMREX_SPACEDIM high sides from direction 0 .. * AMREX_SPACEDIM-1. */ - class Orientation { public: diff --git a/Src/Base/AMReX_PArena.H b/Src/Base/AMReX_PArena.H index d2e8c8ebec3..75db747fd9f 100644 --- a/Src/Base/AMReX_PArena.H +++ b/Src/Base/AMReX_PArena.H @@ -15,7 +15,6 @@ namespace amrex { * \brief This arena uses CUDA stream-ordered memory allocator if available. * If not, use The_Arena(). */ - class PArena : public Arena diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H index 01a0098333e..b6f4799f2e8 100644 --- a/Src/Base/AMReX_ParmParse.H +++ b/Src/Base/AMReX_ParmParse.H @@ -267,7 +267,6 @@ class IntVect; * #endif * */ - class ParmParse { public: diff --git a/Src/Base/AMReX_ParmParse.cpp b/Src/Base/AMReX_ParmParse.cpp index c2ecfc7b37a..6fe442bfc5b 100644 --- a/Src/Base/AMReX_ParmParse.cpp +++ b/Src/Base/AMReX_ParmParse.cpp @@ -450,7 +450,6 @@ ppfound (const std::string& keyword, // except if n==-1, return the index of the last occurrence. // Return 0 if the specified occurrence does not exist. // - const ParmParse::PP_entry* ppindex (const ParmParse::Table& table, int n, diff --git a/Src/Base/AMReX_RealVect.H b/Src/Base/AMReX_RealVect.H index 635d21927f6..9e1d72700f7 100644 --- a/Src/Base/AMReX_RealVect.H +++ b/Src/Base/AMReX_RealVect.H @@ -28,7 +28,6 @@ namespace amrex C++ array. In addition, the basic arithmetic operators have been overloaded to implement scaling and translation operations. */ - class RealVect { public: diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H index cfac0851cab..d68bf00bfb4 100644 --- a/Src/Base/AMReX_RungeKutta.H +++ b/Src/Base/AMReX_RungeKutta.H @@ -4,8 +4,6 @@ #include -namespace amrex::RungeKutta { - /** * \brief Functions for Runge-Kutta methods * @@ -48,6 +46,7 @@ namespace amrex::RungeKutta { * FillPatcher class can be useful for implementing such a callable. See * AmrLevel::RK for an example. */ +namespace amrex::RungeKutta { struct PostStageNoOp { template diff --git a/Src/Base/AMReX_Vector.H b/Src/Base/AMReX_Vector.H index c377076fe1b..18e14d5c3c0 100644 --- a/Src/Base/AMReX_Vector.H +++ b/Src/Base/AMReX_Vector.H @@ -20,7 +20,6 @@ namespace amrex { * Vector::operator[] provides bound checking when compiled with * DEBUG=TRUE. */ - template > class Vector : diff --git a/Src/Base/AMReX_VisMF.H b/Src/Base/AMReX_VisMF.H index f0b146f6a9d..468523e0039 100644 --- a/Src/Base/AMReX_VisMF.H +++ b/Src/Base/AMReX_VisMF.H @@ -29,7 +29,6 @@ class IArrayBox; * \brief File I/O for FabArray. * Wrapper class for reading/writing FabArray objects to disk in various "smart" ways. */ - class VisMF : public VisMFBuffer { diff --git a/Src/Boundary/AMReX_BoundCond.H b/Src/Boundary/AMReX_BoundCond.H index 834f790f6b5..963a2fa7ecc 100644 --- a/Src/Boundary/AMReX_BoundCond.H +++ b/Src/Boundary/AMReX_BoundCond.H @@ -16,7 +16,6 @@ namespace amrex { boundary conditions are specified via an integer identifier. This class maintains that integer. */ - class BoundCond { public: diff --git a/Src/Boundary/AMReX_FabSet.H b/Src/Boundary/AMReX_FabSet.H index f4ae8b7d247..9841555b336 100644 --- a/Src/Boundary/AMReX_FabSet.H +++ b/Src/Boundary/AMReX_FabSet.H @@ -40,7 +40,6 @@ namespace amrex { FabSets are used primarily as a data storage mechanism, and are manipulated by more sophisticated control classes. */ - template class FabSetT { diff --git a/Src/Boundary/AMReX_Mask.H b/Src/Boundary/AMReX_Mask.H index 3a41ea81913..02000250f49 100644 --- a/Src/Boundary/AMReX_Mask.H +++ b/Src/Boundary/AMReX_Mask.H @@ -22,7 +22,6 @@ namespace amrex { This class does NOT provide a copy constructor or assignment operator. */ - class Mask final : public BaseFab diff --git a/Src/Boundary/AMReX_YAFluxRegister.H b/Src/Boundary/AMReX_YAFluxRegister.H index 075a630a2f7..e26426ce15b 100644 --- a/Src/Boundary/AMReX_YAFluxRegister.H +++ b/Src/Boundary/AMReX_YAFluxRegister.H @@ -23,7 +23,6 @@ namespace amrex { `Reflux` is called to update the coarse cells next to the coarse/fine boundary. */ - template class YAFluxRegisterT { diff --git a/Src/EB/AMReX_EBFluxRegister.H b/Src/EB/AMReX_EBFluxRegister.H index 33ec811dcf6..72fec3b6a7c 100644 --- a/Src/EB/AMReX_EBFluxRegister.H +++ b/Src/EB/AMReX_EBFluxRegister.H @@ -53,7 +53,6 @@ namespace amrex { to add the part in ghost cells (excluding ghost cells covered by valid cells of other grids) to EBFluxRegister's internal data. */ - class EBFluxRegister : public YAFluxRegister { diff --git a/Src/Extern/Bittree/AMReX_Bittree.H b/Src/Extern/Bittree/AMReX_Bittree.H index 54a046be720..feb05e9f189 100644 --- a/Src/Extern/Bittree/AMReX_Bittree.H +++ b/Src/Extern/Bittree/AMReX_Bittree.H @@ -18,7 +18,6 @@ LIBRARIES += -lbittree Include in inputs: amr.use_bittree = true */ - class btUnit { // Functions used in AmrMesh public: diff --git a/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp b/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp index 8408f75c41d..34671fac1a8 100644 --- a/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp +++ b/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp @@ -24,7 +24,6 @@ namespace amrex::sundials { /* ---------------------------------------------------------------------------- * Function to create a new empty multifab vector */ - N_Vector N_VNewEmpty_MultiFab(sunindextype length, ::sundials::Context* sunctx) { /* Create vector */ @@ -76,7 +75,6 @@ N_Vector N_VNewEmpty_MultiFab(sunindextype length, ::sundials::Context* sunctx) /* ---------------------------------------------------------------------------- * Function to create a new MultiFab vector */ - N_Vector N_VNew_MultiFab(sunindextype length, const amrex::BoxArray &ba, const amrex::DistributionMapping &dm, @@ -102,7 +100,6 @@ N_Vector N_VNew_MultiFab(sunindextype length, /* ---------------------------------------------------------------------------- * Function to create a MultiFab N_Vector with user-specific MultiFab */ - N_Vector N_VMake_MultiFab(sunindextype length, amrex::MultiFab *v_mf, ::sundials::Context* sunctx) { diff --git a/Src/Extern/SUNDIALS/AMReX_SUNMemory.H b/Src/Extern/SUNDIALS/AMReX_SUNMemory.H index f7700ce4210..5fc01c3b6b9 100644 --- a/Src/Extern/SUNDIALS/AMReX_SUNMemory.H +++ b/Src/Extern/SUNDIALS/AMReX_SUNMemory.H @@ -13,7 +13,6 @@ namespace amrex::sundials { * * This class allows SUNDIALS to allocate memory using the amrex::Arena. */ - class MemoryHelper { public: MemoryHelper(::sundials::Context* sunctx); diff --git a/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H b/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H index bb3695d19ac..090a5f43534 100644 --- a/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H +++ b/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H @@ -15,7 +15,6 @@ namespace amrex::sundials { * This will create the nthreads SUNDIALS context objects that are needed by * the SUNDIALS solver and vector objects. Called by amrex::Initialize. */ - void Initialize(int nthreads); /** @@ -23,7 +22,6 @@ void Initialize(int nthreads); * * Called by amrex::Finalize. */ - void Finalize(); /** @@ -33,7 +31,6 @@ void Finalize(); * * A SUNDIALS context should not be used concurrently from different threads. */ - ::sundials::Context* The_Sundials_Context(int i = amrex::OpenMP::get_thread_num()); } diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H index aa737455ce6..7ca26cef064 100644 --- a/Src/Particle/AMReX_ParticleTransformation.H +++ b/Src/Particle/AMReX_ParticleTransformation.H @@ -608,10 +608,8 @@ int filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& * \param p predicate function - particles will be copied if p returns true * \param src_start the offset at which to start reading particles from src * \param dst_start the offset at which to start writing particles to dst - * \param n the number of particles to apply the operation to * */ - template >,Index> nvccfoo = 0> Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&& f, diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 03fe2a29d65..182802e51f2 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -47,7 +47,6 @@ numParticlesOutOfRange (Iterator const& pti, int nGrow) * \param nGrow the number of grow cells allowed. * */ - template ::value && !Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0> int numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) From 0a208bbc4635abacd9f087b62aa3cdffc25db113 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 14 Dec 2023 11:04:37 -0800 Subject: [PATCH 066/186] Clang-Tidy in CI: Keep Going after Errors (#3667) Add `-k` to the make command running clang-tidy. With that, the jobs will keep going and show all the clang-tidy check errors instead of stopping on the first error. --- .github/workflows/bittree.yml | 4 ++-- .github/workflows/clang.yml | 6 +++--- .github/workflows/gcc.yml | 26 +++++++++++++------------- .github/workflows/hypre.yml | 4 ++-- .github/workflows/petsc.yml | 2 +- .github/workflows/smoke.yml | 2 +- .github/workflows/sundials.yml | 2 +- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/workflows/bittree.yml b/.github/workflows/bittree.yml index c12fbedc58f..687bf07c00d 100644 --- a/.github/workflows/bittree.yml +++ b/.github/workflows/bittree.yml @@ -52,7 +52,7 @@ jobs: mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs_bittree amr.plot_int=1000 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-15 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -104,7 +104,7 @@ jobs: mpiexec -n 2 ./main3d.gnu.TEST.MPI.ex inputs_bittree max_step=10 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-15 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index ec469bb5de6..a343832b510 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -59,7 +59,7 @@ jobs: make test_install ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -117,7 +117,7 @@ jobs: make -j 2 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-14 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -159,7 +159,7 @@ jobs: make install ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-14 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index afc2044bdd7..aca7a9c8723 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -55,7 +55,7 @@ jobs: make test_install ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -107,7 +107,7 @@ jobs: cmake --build build -j 2 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -155,7 +155,7 @@ jobs: cmake --build build -j 2 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -204,7 +204,7 @@ jobs: cmake --build build -j 2 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -263,7 +263,7 @@ jobs: # Let's not use clang-tidy for this test because it wants to use C++20. # ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - # make -j2 -f clang-tidy-ccache-misses.mak \ + # make -j2 -k -f clang-tidy-ccache-misses.mak \ # CLANG_TIDY=clang-tidy-12 \ # CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -320,7 +320,7 @@ jobs: make -j 2 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -384,7 +384,7 @@ jobs: make -j 2 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-14 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -426,7 +426,7 @@ jobs: make install ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -466,7 +466,7 @@ jobs: make install ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-15 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -506,7 +506,7 @@ jobs: make install ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -546,7 +546,7 @@ jobs: make install ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -585,7 +585,7 @@ jobs: CCACHE=ccache ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -630,7 +630,7 @@ jobs: make -j 2 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-12 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" diff --git a/.github/workflows/hypre.yml b/.github/workflows/hypre.yml index 50423f3942a..871224fc79c 100644 --- a/.github/workflows/hypre.yml +++ b/.github/workflows/hypre.yml @@ -100,7 +100,7 @@ jobs: mpiexec -n 2 ./main3d.gnu.MPI.ex inputs.hypre ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-14 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" @@ -148,7 +148,7 @@ jobs: mpiexec -n 2 ./main2d.gnu.MPI.ex inputs.2d ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-14 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" diff --git a/.github/workflows/petsc.yml b/.github/workflows/petsc.yml index 6d0b92b1343..eaddf1c2489 100644 --- a/.github/workflows/petsc.yml +++ b/.github/workflows/petsc.yml @@ -50,7 +50,7 @@ jobs: mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs.rt.2d.petsc ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-14 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 080a17fd984..d907b485261 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -47,7 +47,7 @@ jobs: make test_install ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-15 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" diff --git a/.github/workflows/sundials.yml b/.github/workflows/sundials.yml index 12dfd42c159..a890e10fad3 100644 --- a/.github/workflows/sundials.yml +++ b/.github/workflows/sundials.yml @@ -60,7 +60,7 @@ jobs: cmake --build build -j 2 ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt - make -j2 -f clang-tidy-ccache-misses.mak \ + make -j2 -k -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy-14 \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" From 0c6f2b4ad81178a6e14f26ebb5f2ad6c642c785c Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 14 Dec 2023 14:55:44 -0800 Subject: [PATCH 067/186] CMake: AMReX_PARALLEL_LINK_JOBS (#3628) Add AMReX_PARALLEL_LINK_JOBS option to the CMake build system and use it for SYCL builds. Adjust SYCL's RelWithDebInfo mode to the recommendation of Intel VTune. This is a follow-up on #3498 that made similar changes to GNU Make. --- .github/workflows/intel.yml | 12 +++++--- Docs/sphinx_documentation/source/GPU.rst | 36 +++++++++++++----------- Tools/CMake/AMReXFlagsTargets.cmake | 2 +- Tools/CMake/AMReXOptions.cmake | 10 +++++++ Tools/CMake/AMReXSYCL.cmake | 6 ++++ 5 files changed, 44 insertions(+), 22 deletions(-) diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index d86035d916e..6474214e0a2 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -44,7 +44,8 @@ jobs: -DCMAKE_C_COMPILER=$(which icx) \ -DCMAKE_CXX_COMPILER=$(which icpx) \ -DCMAKE_Fortran_COMPILER=$(which ifx) \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DAMReX_PARALLEL_LINK_JOBS=2 cmake --build build --parallel 2 ccache -s @@ -86,7 +87,8 @@ jobs: -DAMReX_GPU_BACKEND=SYCL \ -DCMAKE_C_COMPILER=$(which icx) \ -DCMAKE_CXX_COMPILER=$(which icpx) \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DAMReX_PARALLEL_LINK_JOBS=2 cmake --build build --parallel 2 ccache -s @@ -136,7 +138,8 @@ jobs: -DAMReX_GPU_BACKEND=SYCL \ -DCMAKE_C_COMPILER=$(which icx) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DAMReX_PARALLEL_LINK_JOBS=2 cmake --build build --parallel 2 ccache -s @@ -186,7 +189,8 @@ jobs: -DAMReX_SYCL_SUB_GROUP_SIZE=64 \ -DCMAKE_C_COMPILER=$(which icx) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DAMReX_PARALLEL_LINK_JOBS=2 cmake --build build --parallel 2 ccache -s diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index aff060e9166..08297cb3e2a 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -217,7 +217,7 @@ variables to configure the build +------------------------------+-------------------------------------------------+-------------+-----------------+ | SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 | +------------------------------+-------------------------------------------------+-------------+-----------------+ - | SYCL_MAX_PARALLEL_LINK_JOBS | Number of parallel jobs in device link | 1 | 1, 2, 3, etc. | + | SYCL_PARALLEL_LINK_JOBS | Number of parallel jobs in device link | 1 | 1, 2, 3, etc. | +------------------------------+-------------------------------------------------+-------------+-----------------+ .. raw:: latex @@ -428,22 +428,24 @@ Below is an example configuration for SYCL: .. table:: AMReX SYCL-specific build options - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | Variable Name | Description | Default | Possible values | - +==============================+=================================================+=============+=================+ - | AMReX_SYCL_AOT | Enable SYCL ahead-of-time compilation | NO | YES, NO | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMReX_SYCL_AOT_GRF_MODE | Specify AOT register file mode | Default | Default, Large, | - | | | | AutoLarge | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMREX_INTEL_ARCH | Specify target if AOT is enabled | None | pvc, etc. | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMReX_SYCL_SPLIT_KERNEL | Enable SYCL kernel splitting | YES | YES, NO | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMReX_SYCL_ONEDPL | Enable SYCL's oneDPL algorithms | NO | YES, NO | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMReX_SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 | - +------------------------------+-------------------------------------------------+-------------+-----------------+ + +-------------------------------+----------------------------------------------+-------------+------------------+ + | Variable Name | Description | Default | Possible values | + +===============================+==============================================+=============+==================+ + | AMReX_SYCL_AOT | Enable SYCL ahead-of-time compilation | NO | YES, NO | + +-------------------------------+----------------------------------------------+-------------+------------------+ + | AMReX_SYCL_AOT_GRF_MODE | Specify AOT register file mode | Default | Default, Large, | + | | | | AutoLarge | + +-------------------------------+----------------------------------------------+-------------+------------------+ + | AMREX_INTEL_ARCH | Specify target if AOT is enabled | None | pvc, etc. | + +-------------------------------+----------------------------------------------+-------------+------------------+ + | AMReX_SYCL_SPLIT_KERNEL | Enable SYCL kernel splitting | YES | YES, NO | + +-------------------------------+----------------------------------------------+-------------+------------------+ + | AMReX_SYCL_ONEDPL | Enable SYCL's oneDPL algorithms | NO | YES, NO | + +-------------------------------+----------------------------------------------+-------------+------------------+ + | AMReX_SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 | + +-------------------------------+----------------------------------------------+-------------+------------------+ + | AMReX_PARALLEL_LINK_JOBS | Specify number of parallel link jobs | 1 | positive integer | + +-------------------------------+----------------------------------------------+-------------+------------------+ .. raw:: latex \end{center} diff --git a/Tools/CMake/AMReXFlagsTargets.cmake b/Tools/CMake/AMReXFlagsTargets.cmake index 9e3073cd53f..a2e86b2fbd3 100644 --- a/Tools/CMake/AMReXFlagsTargets.cmake +++ b/Tools/CMake/AMReXFlagsTargets.cmake @@ -89,7 +89,7 @@ target_compile_options( Flags_CXX $<${_cxx_appleclang_rwdbg}:> $<${_cxx_appleclang_rel}:> $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> - $<${_cxx_intelllvm_rwdbg}:-g1> + $<${_cxx_intelllvm_rwdbg}:-gline-tables-only -fdebug-info-for-profiling> # recommended by Intel VTune $<${_cxx_intelllvm_rel}:> ) diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake index 80196639984..e24244ea29a 100644 --- a/Tools/CMake/AMReXOptions.cmake +++ b/Tools/CMake/AMReXOptions.cmake @@ -213,6 +213,16 @@ if (AMReX_SYCL) endif() endif() + set(AMReX_PARALLEL_LINK_JOBS_DEFAULT 1) + if (DEFINED ENV{AMREX_PARALLEL_LINK_JOBS}) + set(AMReX_PARALLEL_LINK_JOBS_DEFAULT "$ENV{AMREX_PARALLEL_LINK_JOBS}") + endif() + set(AMReX_PARALLEL_LINK_JOBS ${AMReX_PARALLEL_LINK_JOBS_DEFAULT} + CACHE STRING "SYCL max parallel link jobs") + if (NOT AMReX_PARALLEL_LINK_JOBS GREATER_EQUAL 1 OR + NOT AMReX_PARALLEL_LINK_JOBS MATCHES "^[1-9][0-9]*$") + message(FATAL_ERROR "AMReX_PARALLEL_LINK_JOBS (${AMReX_PARALLEL_LINK_JOBS}) must be a positive integer") + endif() endif () # --- HIP ---- diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake index a67571dc412..2b48f1c53fe 100644 --- a/Tools/CMake/AMReXSYCL.cmake +++ b/Tools/CMake/AMReXSYCL.cmake @@ -88,4 +88,10 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND "${CMAKE_BUILD_TYPE}" MATCHES "Debug" "$<${_cxx_sycl}:-fsycl-link-huge-device-code>" ) endif () +if (AMReX_PARALLEL_LINK_JOBS GREATER 1) + target_link_options( SYCL + INTERFACE + $<${_cxx_sycl}:-fsycl-max-parallel-link-jobs=${AMReX_PARALLEL_LINK_JOBS}>) +endif() + unset(_cxx_sycl) From 554b1ca3b00c3a56cbed4861bfa16c8074a78fc5 Mon Sep 17 00:00:00 2001 From: AlexanderSinn <64009254+AlexanderSinn@users.noreply.github.com> Date: Fri, 15 Dec 2023 01:21:16 +0100 Subject: [PATCH 068/186] PureSoA IdCpu fixes (#3671) ## Summary I noticed a few issues in AMReX while trying to update HiPACE++ for #3585. Additionally, I would like to point out that `ParticleTile` has `push_back_real` and `push_back_int` functions but for PureSoA there is no `push_back_idcpu`, however this is not added in this PR. ## Additional background ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Particle/AMReX_Particle.H | 9 --------- Src/Particle/AMReX_ParticleTile.H | 16 +++++++++++----- Src/Particle/AMReX_StructOfArrays.H | 3 +-- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 16004d1231e..4ae8b7c4365 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -24,15 +24,6 @@ namespace constexpr Long NoSplitParticleID = GhostParticleID - 4; } - /** Used for 32bit int particle Ids, as in pure SoA layout */ - namespace IntParticleIds { - constexpr int GhostParticleID = 2147483647; // 2**31-1 - constexpr int VirtualParticleID = GhostParticleID - 1; - constexpr int LastParticleID = GhostParticleID - 2; - constexpr int DoSplitParticleID = GhostParticleID - 3; - constexpr int NoSplitParticleID = GhostParticleID - 4; - } - using namespace LongParticleIds; } diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index a1bdbdd56e2..1048df8724d 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -458,7 +458,7 @@ SoAParticle::NextID () #endif next = the_next_id++; - if (next > IntParticleIds::LastParticleID) { + if (next > LongParticleIds::LastParticleID) { amrex::Abort("SoAParticle::NextID() -- too many particles"); } @@ -470,7 +470,7 @@ int SoAParticle::UnprotectedNextID () { int next = the_next_id++; - if (next > IntParticleIds::LastParticleID) { + if (next > LongParticleIds::LastParticleID) { amrex::Abort("SoAParticle::NextID() -- too many particles"); } return next; @@ -1039,7 +1039,9 @@ struct ParticleTile void shrink_to_fit () { - if constexpr (!ParticleType::is_soa_particle) { + if constexpr (ParticleType::is_soa_particle) { + GetStructOfArrays().GetIdCPUData().shrink_to_fit(); + } else { m_aos_tile().shrink_to_fit(); } for (int j = 0; j < NumRealComps(); ++j) @@ -1058,7 +1060,9 @@ struct ParticleTile Long capacity () const { Long nbytes = 0; - if constexpr (!ParticleType::is_soa_particle) { + if constexpr (ParticleType::is_soa_particle) { + nbytes += GetStructOfArrays().GetIdCPUData().capacity() * sizeof(uint64_t); + } else { nbytes += m_aos_tile().capacity() * sizeof(ParticleType); } for (int j = 0; j < NumRealComps(); ++j) @@ -1077,7 +1081,9 @@ struct ParticleTile void swap (ParticleTile& other) { - if constexpr (!ParticleType::is_soa_particle) { + if constexpr (ParticleType::is_soa_particle) { + GetStructOfArrays().GetIdCPUData().swap(other.GetStructOfArrays().GetIdCPUData()); + } else { m_aos_tile().swap(other.GetArrayOfStructs()()); } for (int j = 0; j < NumRealComps(); ++j) diff --git a/Src/Particle/AMReX_StructOfArrays.H b/Src/Particle/AMReX_StructOfArrays.H index 6cd498e20a2..4de35e085ca 100644 --- a/Src/Particle/AMReX_StructOfArrays.H +++ b/Src/Particle/AMReX_StructOfArrays.H @@ -195,13 +195,12 @@ struct StructOfArrays { for (int i = 0; i < int(m_runtime_idata.size()); ++i) { m_runtime_idata[i].resize(count); } } - [[nodiscard]] IdCPU* idcpuarray () { + [[nodiscard]] uint64_t* idcpuarray () { if constexpr (use64BitIdCpu == true) { return m_idcpu.dataPtr(); } else { return nullptr; } - } [[nodiscard]] GpuArray realarray () From 3497f016d9204adf2b406659100adb2803118468 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 15 Dec 2023 17:27:48 -0800 Subject: [PATCH 069/186] Fix CI for ROCm 6.0 (#3673) Need to explicitly install hiprand package in CI because it's now a standalone project, not a submodule for rocRand according to the release notes. --- .github/workflows/dependencies/dependencies_hip.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dependencies/dependencies_hip.sh b/.github/workflows/dependencies/dependencies_hip.sh index 852342e4ac1..36df2f384ba 100755 --- a/.github/workflows/dependencies/dependencies_hip.sh +++ b/.github/workflows/dependencies/dependencies_hip.sh @@ -43,7 +43,8 @@ sudo apt-get install -y --no-install-recommends \ roctracer-dev \ rocprofiler-dev \ rocrand-dev \ - rocprim-dev + rocprim-dev \ + hiprand-dev # activate # From 78c4fdaf95175f992b87e670780f5cf0d01d0783 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Mon, 18 Dec 2023 11:42:06 -0800 Subject: [PATCH 070/186] Improve ParIter docs (#3676) Show how to get the ParIter type from the ParticleContainer - should be less confusing to new users. The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Docs/sphinx_documentation/source/Particle.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Docs/sphinx_documentation/source/Particle.rst b/Docs/sphinx_documentation/source/Particle.rst index be8292c772b..e3a28591a72 100644 --- a/Docs/sphinx_documentation/source/Particle.rst +++ b/Docs/sphinx_documentation/source/Particle.rst @@ -86,7 +86,8 @@ tracked as the particle positions change. To do this, we provide the :: - ParticleContainer<3, 2, 4, 4> mypc; + using MyParticleContainer = ParticleContainer<3, 2, 4, 4>; + MyParticleContainer mypc; Like the :cpp:`Particle` class itself, the :cpp:`ParticleContainer` class is templated. The first two template parameters have the same meaning as @@ -375,8 +376,8 @@ example, to iterate over all the AoS data: :: - using MyParIter = ConstParIter<2*BL_SPACEDIM>; - for (MyParIter pti(pc, lev); pti.isValid(); ++pti) { + using MyParConstIter = MyParticleContainer::ParConstIterType; + for (MyParConstIter pti(pc, lev); pti.isValid(); ++pti) { const auto& particles = pti.GetArrayOfStructs(); for (const auto& p : particles) { // do stuff with p... @@ -392,7 +393,7 @@ skipped. You can also access the SoA data using the :math:`ParIter` as follows: :: - using MyParIter = ParIter<0, 0, 2, 2>; + using MyParIter = MyParticleContainer::ParIterType; for (MyParIter pti(pc, lev); pti.isValid(); ++pti) { auto& particle_attributes = pti.GetStructOfArrays(); RealVector& real_comp0 = particle_attributes.GetRealData(0); From d1e55fbab7c5d5cb72d3c448a91430a568628b9e Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Mon, 18 Dec 2023 12:01:08 -0800 Subject: [PATCH 071/186] Add PTD version of getParticleCell (#3675) The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Particle/AMReX_ParticleUtil.H | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 182802e51f2..682a82450f0 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -371,6 +371,26 @@ IntVect getParticleCell (P const& p, return iv; } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +IntVect getParticleCell (PTD const& ptd, int i, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const Box& domain) noexcept +{ + if constexpr (PTD::ParticleType::is_soa_particle) + { + IntVect iv( + AMREX_D_DECL(int(amrex::Math::floor((ptd.m_rdata[0][i]-plo[0])*dxi[0])), + int(amrex::Math::floor((ptd.m_rdata[1][i]-plo[1])*dxi[1])), + int(amrex::Math::floor((ptd.m_rdata[2][i]-plo[2])*dxi[2])))); + iv += domain.smallEnd(); + return iv; + } else { + return getParticleCell(ptd.m_aos[i], plo, dxi, domain);; + } +} + struct DefaultAssignor { From ef38229189e3213f992a2e89dbe304fb49db9287 Mon Sep 17 00:00:00 2001 From: Max Katz Date: Mon, 18 Dec 2023 23:21:03 -0500 Subject: [PATCH 072/186] Add a for loop that is unrolled at compile time (#3674) ## Summary The constexpr_for function is fully unrolled at compile time. This is useful for relatively short loops where some of the functions inside the loop are known to possible to evaluate at compile time and may be relatively expensive, so evaluating them at compile time rather than at runtime may be beneficial for performance reasons. ## Additional background This has been used in AMReX-Astro/Microphysics successfully in the context of evaluating some nuclear reaction network quantities at compile time. ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_Loop.H | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Src/Base/AMReX_Loop.H b/Src/Base/AMReX_Loop.H index 84b39107e45..19e1c3e5191 100644 --- a/Src/Base/AMReX_Loop.H +++ b/Src/Base/AMReX_Loop.H @@ -211,6 +211,30 @@ void LoopConcurrentOnCpu (Box const& bx, int ncomp, F&& f) noexcept }}}} } +// Implementation of "constexpr for" based on +// https://artificial-mind.net/blog/2020/10/31/constexpr-for +// +// Approximates what one would get from a compile-time +// unrolling of the loop +// for (int i = 0; i < N; ++i) { +// f(i); +// } +// +// The mechanism is recursive: we evaluate f(i) at the current +// i and then call the for loop at i+1. f() is a lambda function +// that provides the body of the loop and takes only an integer +// i as its argument. + +template +AMREX_GPU_HOST_DEVICE AMREX_INLINE +constexpr void constexpr_for (F&& f) +{ + if constexpr (I < N) { + f(std::integral_constant()); + constexpr_for(f); + } +} + #include } From 85462ce91e038451894daef45d339c4374c0b6ac Mon Sep 17 00:00:00 2001 From: Ankith Anil Das <46082093+ankithadas@users.noreply.github.com> Date: Thu, 21 Dec 2023 05:58:00 +1100 Subject: [PATCH 073/186] Eliminating Matrix operations in MLMG CG bottom solver if initial vector is zero (#3668) A matrix multiplication and a few copy operations can be avoided if the input vector is zero. MLMG calls all the the bottom solvers with zeroed `x` vector, and thus the initial residual calculation `b - Ax` is `b`. Furthermore, it also eliminates the memory requirement of storing the initial vector. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 60 +++++++++++++++++------ Src/LinearSolvers/MLMG/AMReX_MLMG.H | 1 + 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index 3afa56ee245..ff9198215fa 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -42,6 +42,16 @@ public: void setMaxIter (int _maxiter) { maxiter = _maxiter; } [[nodiscard]] int getMaxIter () const { return maxiter; } + + /** + * Is the initial guess provided to the solver zero ? + * If so, set this to true. + * The solver will avoid a few operations if this is true. + * Default is false. + */ + void setInitSolnZeroed (bool _sol_zeroed) { initial_vec_zeroed = _sol_zeroed; } + [[nodiscard]] bool getInitSolnZeroed () const { return initial_vec_zeroed; } + void setNGhost(int _nghost) {nghost = IntVect(_nghost);} [[nodiscard]] int getNGhost() {return nghost[0];} @@ -62,6 +72,7 @@ private: int maxiter = 100; IntVect nghost = IntVect(0); int iter = -1; + bool initial_vec_zeroed = false; }; template @@ -95,21 +106,28 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) p.setVal(RT(0.0)); // Make sure all entries are initialized to avoid errors r.setVal(RT(0.0)); - MF sorig = Lp.make(amrlev, mglev, nghost); MF rh = Lp.make(amrlev, mglev, nghost); MF v = Lp.make(amrlev, mglev, nghost); MF t = Lp.make(amrlev, mglev, nghost); - Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT::BCMode::Homogeneous); + + MF sorig; + + if ( initial_vec_zeroed ) { + r.LocalCopy(rhs,0,0,ncomp,nghost); + } else { + sorig = Lp.make(amrlev, mglev, nghost); + + Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT::BCMode::Homogeneous); + + sorig.LocalCopy(sol,0,0,ncomp,nghost); + sol.setVal(RT(0.0)); + } // Then normalize Lp.normalize(amrlev, mglev, r); - - sorig.LocalCopy(sol,0,0,ncomp,nghost); rh.LocalCopy (r ,0,0,ncomp,nghost); - sol.setVal(RT(0.0)); - RT rnorm = norm_inf(r); const RT rnorm0 = rnorm; @@ -238,12 +256,16 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { - sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + if ( !initial_vec_zeroed ) { + sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + } } else { sol.setVal(RT(0.0)); - sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + if ( !initial_vec_zeroed ) { + sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + } } return ret; @@ -260,15 +282,21 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) MF p = Lp.make(amrlev, mglev, sol.nGrowVect()); p.setVal(RT(0.0)); - MF sorig = Lp.make(amrlev, mglev, nghost); MF r = Lp.make(amrlev, mglev, nghost); MF q = Lp.make(amrlev, mglev, nghost); - sorig.LocalCopy(sol,0,0,ncomp,nghost); + MF sorig; + + if ( initial_vec_zeroed ) { + r.LocalCopy(rhs,0,0,ncomp,nghost); + } else { + sorig = Lp.make(amrlev, mglev, nghost); - Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT::BCMode::Homogeneous); + Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT::BCMode::Homogeneous); - sol.setVal(RT(0.0)); + sorig.LocalCopy(sol,0,0,ncomp,nghost); + sol.setVal(RT(0.0)); + } RT rnorm = norm_inf(r); const RT rnorm0 = rnorm; @@ -364,12 +392,16 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { - sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + if ( !initial_vec_zeroed ) { + sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + } } else { sol.setVal(RT(0.0)); - sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + if ( !initial_vec_zeroed ) { + sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + } } return ret; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 70e7e121486..84adba7dfdc 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -1526,6 +1526,7 @@ MLMGT::bottomSolveWithCG (MF& x, const MF& b, typename MLCGSolverT::Type cg_solver.setSolver(type); cg_solver.setVerbose(bottom_verbose); cg_solver.setMaxIter(bottom_maxiter); + cg_solver.setInitSolnZeroed(true); if (cf_strategy == CFStrategy::ghostnodes) { cg_solver.setNGhost(linop.getNGrow()); } int ret = cg_solver.solve(x, b, bottom_reltol, bottom_abstol); From 3407e877a7b219546c0ef12bef287845f5cb9f05 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 20 Dec 2023 12:11:53 -0800 Subject: [PATCH 074/186] Add a few free functions for MLMG (#3680) These are useful when we use Array as the data type for MLMG. --- Src/Base/AMReX_FabArrayBase.H | 5 + Src/Base/AMReX_FabArrayBase.cpp | 20 ++++ Src/Base/AMReX_FabArrayUtility.H | 187 +++++++++++++++++++++++++++++++ Src/Base/AMReX_TypeTraits.H | 12 ++ 4 files changed, 224 insertions(+) diff --git a/Src/Base/AMReX_FabArrayBase.H b/Src/Base/AMReX_FabArrayBase.H index d8bc4411874..e2cf0ed9641 100644 --- a/Src/Base/AMReX_FabArrayBase.H +++ b/Src/Base/AMReX_FabArrayBase.H @@ -721,6 +721,11 @@ public: }; +[[nodiscard]] int nComp (FabArrayBase const& fa); +[[nodiscard]] IntVect nGrowVect (FabArrayBase const& fa); +[[nodiscard]] BoxArray const& boxArray (FabArrayBase const& fa); +[[nodiscard]] DistributionMapping const& DistributionMap (FabArrayBase const& fa); + #ifdef BL_USE_MPI bool CheckRcvStats (Vector& recv_stats, const Vector& recv_size, int tag); #endif diff --git a/Src/Base/AMReX_FabArrayBase.cpp b/Src/Base/AMReX_FabArrayBase.cpp index 8dd8275f66a..6997f3489dd 100644 --- a/Src/Base/AMReX_FabArrayBase.cpp +++ b/Src/Base/AMReX_FabArrayBase.cpp @@ -2699,4 +2699,24 @@ FabArrayBase::flushParForCache () #endif +int nComp (FabArrayBase const& fa) +{ + return fa.nComp(); +} + +IntVect nGrowVect (FabArrayBase const& fa) +{ + return fa.nGrowVect(); +} + +BoxArray const& boxArray (FabArrayBase const& fa) +{ + return fa.boxArray(); +} + +DistributionMapping const& DistributionMap (FabArrayBase const& fa) +{ + return fa.DistributionMap(); +} + } diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H index ca80a070f45..0897c57ed4f 100644 --- a/Src/Base/AMReX_FabArrayUtility.H +++ b/Src/Base/AMReX_FabArrayUtility.H @@ -1602,6 +1602,193 @@ Dot (FabArray const& x, int xcomp, FabArray const& y, int ycomp, int n return sm; } +//! dst = val +template ,int> = 0> +void setVal (MF& dst, typename MF::value_type val) +{ + dst.setVal(val); +} + +//! dst = val in ghost cells. +template ,int> = 0> +void setBndry (MF& dst, typename MF::value_type val, int scomp, int ncomp) +{ + dst.setBndry(val, scomp, ncomp); +} + +//! dst = src +template && + IsMultiFabLike_v, int> = 0> +void LocalCopy (DMF& dst, SMF const& src, int scomp, int dcomp, + int ncomp, IntVect const& nghost) +{ + amrex::Copy(dst, src, scomp, dcomp, ncomp, nghost); +} + +//! dst += src +template ,int> = 0> +void LocalAdd (MF& dst, MF const& src, int scomp, int dcomp, + int ncomp, IntVect const& nghost) +{ + amrex::Add(dst, src, scomp, dcomp, ncomp, nghost); +} + +//! dst += a * src +template ,int> = 0> +void Saxpy (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp, + int ncomp, IntVect const& nghost) +{ + MF::Saxpy(dst, a, src, scomp, dcomp, ncomp, nghost); +} + +//! dst = src + a * dst +template ,int> = 0> +void Xpay (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp, + int ncomp, IntVect const& nghost) +{ + MF::Xpay(dst, a, src, scomp, dcomp, ncomp, nghost); +} + +//! dst = src w/ MPI communication +template , int> = 0> +void ParallelCopy (MF& dst, MF const& src, int scomp, int dcomp, int ncomp, + IntVect const& ng_src = IntVect(0), + IntVect const& ng_dst = IntVect(0), + Periodicity const& period = Periodicity::NonPeriodic()) +{ + dst.ParallelCopy(src, scomp, dcomp, ncomp, ng_src, ng_dst, period); +} + +template , int> = 0> +[[nodiscard]] typename MF::value_type +norminf (MF const& mf, int scomp, int ncomp, IntVect const& nghost, + bool local = false) +{ + return mf.norminf(scomp, ncomp, nghost, local); +} + +//! dst = val +template ,int> = 0> +void setVal (Array& dst, typename MF::value_type val) +{ + for (auto& mf: dst) { + mf.setVal(val); + } +} + +//! dst = val in ghost cells. +template ,int> = 0> +void setBndry (Array& dst, typename MF::value_type val, int scomp, int ncomp) +{ + for (auto& mf : dst) { + mf.setBndry(val, scomp, ncomp); + } +} + +//! dst = src +template && + IsMultiFabLike_v, int> = 0> +void LocalCopy (Array& dst, Array const& src, int scomp, int dcomp, + int ncomp, IntVect const& nghost) +{ + for (std::size_t i = 0; i < N; ++i) { + amrex::Copy(dst[i], src[i], scomp, dcomp, ncomp, nghost); + } +} + +//! dst += src +template ,int> = 0> +void LocalAdd (Array& dst, Array const& src, int scomp, int dcomp, + int ncomp, IntVect const& nghost) +{ + for (std::size_t i = 0; i < N; ++i) { + amrex::Add(dst[i], src[i], scomp, dcomp, ncomp, nghost); + } +} + +//! dst += a * src +template ,int> = 0> +void Saxpy (Array& dst, typename MF::value_type a, + Array const& src, int scomp, int dcomp, int ncomp, + IntVect const& nghost) +{ + for (std::size_t i = 0; i < N; ++i) { + MF::Saxpy(dst[i], a, src[i], scomp, dcomp, ncomp, nghost); + } +} + +//! dst = src + a * dst +template ,int> = 0> +void Xpay (Array& dst, typename MF::value_type a, + Array const& src, int scomp, int dcomp, int ncomp, + IntVect const& nghost) +{ + for (std::size_t i = 0; i < N; ++i) { + MF::Xpay(dst[i], a, src[i], scomp, dcomp, ncomp, nghost); + } +} + +//! dst = src w/ MPI communication +template , int> = 0> +void ParallelCopy (Array& dst, Array const& src, + int scomp, int dcomp, int ncomp, + IntVect const& ng_src = IntVect(0), + IntVect const& ng_dst = IntVect(0), + Periodicity const& period = Periodicity::NonPeriodic()) +{ + for (std::size_t i = 0; i < N; ++i) { + dst[i].ParallelCopy(src[i], scomp, dcomp, ncomp, ng_src, ng_dst, period); + } +} + +template , int> = 0> +[[nodiscard]] typename MF::value_type +norminf (Array const& mf, int scomp, int ncomp, IntVect const& nghost, + bool local = false) +{ + auto r = typename MF::value_type(0); + for (std::size_t i = 0; i < N; ++i) { + auto tmp = mf[i].norminf(scomp, ncomp, nghost, true); + r = std::max(r,tmp); + } + if (!local) { + ParallelAllReduce::Max(r, ParallelContext::CommunicatorSub()); + } + return r; +} + +template && (N > 0), + int> = 0> +[[nodiscard]] int nComp (Array const& mf) +{ + return mf[0].nComp(); +} + +template && (N > 0), + int> = 0> +[[nodiscard]] IntVect nGrowVect (Array const& mf) +{ + return mf[0].nGrowVect(); +} + +template && (N > 0), + int> = 0> +[[nodiscard]] BoxArray const& +boxArray (Array const& mf) +{ + return mf[0].boxArray(); +} + +template && (N > 0), + int> = 0> +[[nodiscard]] DistributionMapping const& +DistributionMap (Array const& mf) +{ + return mf[0].DistributionMap(); +} + } #endif diff --git a/Src/Base/AMReX_TypeTraits.H b/Src/Base/AMReX_TypeTraits.H index 222576f05f5..fbcb7a2c0e3 100644 --- a/Src/Base/AMReX_TypeTraits.H +++ b/Src/Base/AMReX_TypeTraits.H @@ -37,6 +37,18 @@ namespace amrex template inline constexpr bool IsFabArray_v = IsFabArray::value; + template + struct IsMultiFabLike : std::false_type {}; + // + template + struct IsMultiFabLike && + IsBaseFab_v > > + : std::true_type {}; + // + template + inline constexpr bool IsMultiFabLike_v = IsMultiFabLike::value; + + template using EnableIf_t = typename std::enable_if::type; From 75571e2dcbf2417529c5ed8e24113580e8e1f3f1 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 20 Dec 2023 19:04:57 -0800 Subject: [PATCH 075/186] MLMG: Use free functions instead of MF member functions (#3681) Note that the use of unqualified functions (e.g., setVal instead of amrex::setVal) is intentional. With ADL, these calls in MLMG could work with user defined data. --- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 72 ++++---- Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 83 ++++++--- Src/LinearSolvers/MLMG/AMReX_MLMG.H | 204 +++++++++++----------- 3 files changed, 199 insertions(+), 160 deletions(-) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index ff9198215fa..3bfab3c9f61 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -12,8 +12,8 @@ class MLCGSolverT { public: - using FAB = typename MF::fab_type; - using RT = typename MF::value_type; + using FAB = typename MLLinOpT::FAB; + using RT = typename MLLinOpT::RT; enum struct Type { BiCGStab, CG }; @@ -99,12 +99,12 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) { BL_PROFILE("MLCGSolver::bicgstab"); - const int ncomp = sol.nComp(); + const int ncomp = nComp(sol); - MF p = Lp.make(amrlev, mglev, sol.nGrowVect()); - MF r = Lp.make(amrlev, mglev, sol.nGrowVect()); - p.setVal(RT(0.0)); // Make sure all entries are initialized to avoid errors - r.setVal(RT(0.0)); + MF p = Lp.make(amrlev, mglev, nGrowVect(sol)); + MF r = Lp.make(amrlev, mglev, nGrowVect(sol)); + setVal(p, RT(0.0)); // Make sure all entries are initialized to avoid errors + setVal(r, RT(0.0)); MF rh = Lp.make(amrlev, mglev, nghost); MF v = Lp.make(amrlev, mglev, nghost); @@ -114,19 +114,19 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) MF sorig; if ( initial_vec_zeroed ) { - r.LocalCopy(rhs,0,0,ncomp,nghost); + LocalCopy(r,rhs,0,0,ncomp,nghost); } else { sorig = Lp.make(amrlev, mglev, nghost); Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT::BCMode::Homogeneous); - sorig.LocalCopy(sol,0,0,ncomp,nghost); - sol.setVal(RT(0.0)); + LocalCopy(sorig,sol,0,0,ncomp,nghost); + setVal(sol, RT(0.0)); } // Then normalize Lp.normalize(amrlev, mglev, r); - rh.LocalCopy (r ,0,0,ncomp,nghost); + LocalCopy(rh, r, 0,0,ncomp,nghost); RT rnorm = norm_inf(r); const RT rnorm0 = rnorm; @@ -159,13 +159,13 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) } if ( iter == 1 ) { - p.LocalCopy(r,0,0,ncomp,nghost); + LocalCopy(p,r,0,0,ncomp,nghost); } else { const RT beta = (rho/rho_1)*(alpha/omega); - MF::Saxpy(p, -omega, v, 0, 0, ncomp, nghost); // p += -omega*v - MF::Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta*p + Saxpy(p, -omega, v, 0, 0, ncomp, nghost); // p += -omega*v + Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta*p } Lp.apply(amrlev, mglev, v, p, MLLinOpT::BCMode::Homogeneous, MLLinOpT::StateMode::Correction); Lp.normalize(amrlev, mglev, v); @@ -179,8 +179,8 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) { ret = 2; break; } - MF::Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p - MF::Saxpy(r, -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v + Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p + Saxpy(r, -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v rnorm = norm_inf(r); rnorm = norm_inf(r); @@ -216,8 +216,8 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) { ret = 3; break; } - MF::Saxpy(sol, omega, r, 0, 0, ncomp, nghost); // sol += omega * r - MF::Saxpy(r, -omega, t, 0, 0, ncomp, nghost); // r += -omega * t + Saxpy(sol, omega, r, 0, 0, ncomp, nghost); // sol += omega * r + Saxpy(r, -omega, t, 0, 0, ncomp, nghost); // r += -omega * t rnorm = norm_inf(r); @@ -257,14 +257,14 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { if ( !initial_vec_zeroed ) { - sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + LocalAdd(sol, sorig, 0, 0, ncomp, nghost); } } else { - sol.setVal(RT(0.0)); + setVal(sol, RT(0.0)); if ( !initial_vec_zeroed ) { - sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + LocalAdd(sol, sorig, 0, 0, ncomp, nghost); } } @@ -277,10 +277,10 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) { BL_PROFILE("MLCGSolver::cg"); - const int ncomp = sol.nComp(); + const int ncomp = nComp(sol); - MF p = Lp.make(amrlev, mglev, sol.nGrowVect()); - p.setVal(RT(0.0)); + MF p = Lp.make(amrlev, mglev, nGrowVect(sol)); + setVal(p, RT(0.0)); MF r = Lp.make(amrlev, mglev, nghost); MF q = Lp.make(amrlev, mglev, nghost); @@ -288,14 +288,14 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) MF sorig; if ( initial_vec_zeroed ) { - r.LocalCopy(rhs,0,0,ncomp,nghost); + LocalCopy(r,rhs,0,0,ncomp,nghost); } else { sorig = Lp.make(amrlev, mglev, nghost); Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT::BCMode::Homogeneous); - sorig.LocalCopy(sol,0,0,ncomp,nghost); - sol.setVal(RT(0.0)); + LocalCopy(sorig,sol,0,0,ncomp,nghost); + setVal(sol, RT(0.0)); } RT rnorm = norm_inf(r); @@ -330,12 +330,12 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) } if (iter == 1) { - p.LocalCopy(r,0,0,ncomp,nghost); + LocalCopy(p,r,0,0,ncomp,nghost); } else { RT beta = rho/rho_1; - MF::Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta * p + Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta * p } Lp.apply(amrlev, mglev, q, p, MLLinOpT::BCMode::Homogeneous, MLLinOpT::StateMode::Correction); @@ -357,8 +357,8 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) << " rho " << rho << " alpha " << alpha << '\n'; } - MF::Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p - MF::Saxpy(r, -alpha, q, 0, 0, ncomp, nghost); // r += -alpha * q + Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p + Saxpy(r, -alpha, q, 0, 0, ncomp, nghost); // r += -alpha * q rnorm = norm_inf(r); if ( verbose > 2 ) @@ -393,14 +393,14 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { if ( !initial_vec_zeroed ) { - sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + LocalAdd(sol, sorig, 0, 0, ncomp, nghost); } } else { - sol.setVal(RT(0.0)); + setVal(sol, RT(0.0)); if ( !initial_vec_zeroed ) { - sol.LocalAdd(sorig, 0, 0, ncomp, nghost); + LocalAdd(sol, sorig, 0, 0, ncomp, nghost); } } @@ -422,8 +422,8 @@ template auto MLCGSolverT::norm_inf (const MF& res, bool local) -> RT { - int ncomp = res.nComp(); - RT result = res.norminf(0,ncomp,IntVect(0),true); + int ncomp = nComp(res); + RT result = norminf(res,0,ncomp,IntVect(0),true); if (!local) { BL_PROFILE("MLCGSolver::ParallelAllReduce"); ParallelAllReduce::Max(result, Lp.BottomCommunicator()); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index b8aa71eebd0..f0dca07f3ab 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -85,6 +85,15 @@ struct LinOpEnumType enum struct Location { FaceCenter, FaceCentroid, CellCenter, CellCentroid }; }; +template struct LinOpData {}; +// +template +struct LinOpData > > +{ + using fab_type = typename T::fab_type; + using value_type = typename T::value_type; +}; + template class MLMGT; template class MLCGSolverT; template class MLPoissonT; @@ -100,8 +109,8 @@ public: template friend class MLPoissonT; template friend class MLABecLaplacianT; - using FAB = typename MF::fab_type; - using RT = typename MF::value_type; + using FAB = typename LinOpData::fab_type; + using RT = typename LinOpData::value_type; using BCType = LinOpBCType; using BCMode = LinOpEnumType::BCMode; @@ -1375,13 +1384,18 @@ template void MLLinOpT::make (Vector >& mf, IntVect const& ng) const { - mf.clear(); - mf.resize(m_num_amr_levels); - for (int alev = 0; alev < m_num_amr_levels; ++alev) { - mf[alev].resize(m_num_mg_levels[alev]); - for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) { - mf[alev][mlev] = make(alev, mlev, ng); + if constexpr (IsMultiFabLike_v) { + mf.clear(); + mf.resize(m_num_amr_levels); + for (int alev = 0; alev < m_num_amr_levels; ++alev) { + mf[alev].resize(m_num_mg_levels[alev]); + for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) { + mf[alev][mlev] = make(alev, mlev, ng); + } } + } else { + amrex::ignore_unused(mf, ng); + amrex::Abort("MLLinOpT::make: how did we get here?"); } } @@ -1389,39 +1403,62 @@ template MF MLLinOpT::make (int amrlev, int mglev, IntVect const& ng) const { - return MF(amrex::convert(m_grids[amrlev][mglev], m_ixtype), - m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(), - *m_factory[amrlev][mglev]); + if constexpr (IsMultiFabLike_v) { + return MF(amrex::convert(m_grids[amrlev][mglev], m_ixtype), + m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(), + *m_factory[amrlev][mglev]); + } else { + amrex::ignore_unused(amrlev, mglev, ng); + amrex::Abort("MLLinOpT::make: how did we get here?"); + return {}; + } } template MF MLLinOpT::makeAlias (MF const& mf) const { - return MF(mf, amrex::make_alias, 0, mf.nComp()); + if constexpr (IsMultiFabLike_v) { + return MF(mf, amrex::make_alias, 0, mf.nComp()); + } else { + amrex::ignore_unused(mf); + amrex::Abort("MLLinOpT::makeAlias: how did we get here?"); + return {}; + } } template MF MLLinOpT::makeCoarseMG (int amrlev, int mglev, IntVect const& ng) const { - BoxArray cba = m_grids[amrlev][mglev]; - IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev]; - cba.coarsen(ratio); - cba.convert(m_ixtype); - return MF(cba, m_dmap[amrlev][mglev], getNComp(), ng); - + if constexpr (IsMultiFabLike_v) { + BoxArray cba = m_grids[amrlev][mglev]; + IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev]; + cba.coarsen(ratio); + cba.convert(m_ixtype); + return MF(cba, m_dmap[amrlev][mglev], getNComp(), ng); + } else { + amrex::ignore_unused(amrlev, mglev, ng); + amrex::Abort("MLLinOpT::makeCoarseMG: how did we get here?"); + return {}; + } } template MF MLLinOpT::makeCoarseAmr (int famrlev, IntVect const& ng) const { - BoxArray cba = m_grids[famrlev][0]; - IntVect ratio(AMRRefRatio(famrlev-1)); - cba.coarsen(ratio); - cba.convert(m_ixtype); - return MF(cba, m_dmap[famrlev][0], getNComp(), ng); + if constexpr (IsMultiFabLike_v) { + BoxArray cba = m_grids[famrlev][0]; + IntVect ratio(AMRRefRatio(famrlev-1)); + cba.coarsen(ratio); + cba.convert(m_ixtype); + return MF(cba, m_dmap[famrlev][0], getNComp(), ng); + } else { + amrex::ignore_unused(famrlev, ng); + amrex::Abort("MLLinOpT::makeCoarseAmr: how did we get here?"); + return {}; + } } template diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 84adba7dfdc..9bfc2f0007d 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -21,8 +21,8 @@ public: template friend class MLCGSolverT; - using FAB = typename MF::fab_type; - using RT = typename MF::value_type; + using FAB = typename MLLinOpT::FAB; + using RT = typename MLLinOpT::RT; using BCMode = typename MLLinOpT::BCMode; using Location = typename MLLinOpT::Location; @@ -507,7 +507,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, for (int alev = 0; alev < namrlevs; ++alev) { if (!sol_is_alias[alev]) { - a_sol[alev]->LocalCopy(sol[alev], 0, 0, ncomp, ng_back); + LocalCopy(*a_sol[alev], sol[alev], 0, 0, ncomp, ng_back); } } @@ -541,11 +541,11 @@ MLMGT::getGradSolution (const Vector >& a_grad_so Array grad_sol; for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { auto const& amf = *(a_grad_sol[alev][idim]); - grad_sol[idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0); + grad_sol[idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0); } linop.compGrad(alev, GetArrOfPtrs(grad_sol), sol[alev], a_loc); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - a_grad_sol[alev][idim]->LocalCopy(grad_sol[idim], 0, 0, ncomp, IntVect(0)); + LocalCopy(*a_grad_sol[alev][idim], grad_sol[idim], 0, 0, ncomp, IntVect(0)); } } } @@ -578,13 +578,13 @@ MLMGT::getFluxes (const Vector >& a_flux, for (int ilev = 0; ilev < namrlevs; ++ilev) { for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { auto const& amf = *(a_flux[ilev][idim]); - fluxes[ilev][idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0); + fluxes[ilev][idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0); } } getFluxes(GetVecOfArrOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc); for (int ilev = 0; ilev < namrlevs; ++ilev) { for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - a_flux[ilev][idim]->LocalCopy(fluxes[ilev][idim], 0, 0, ncomp, IntVect(0)); + LocalCopy(*a_flux[ilev][idim], fluxes[ilev][idim], 0, 0, ncomp, IntVect(0)); } } } @@ -618,14 +618,14 @@ MLMGT::getFluxes (const Vector >& a_flux, for (int ilev = 0; ilev < namrlevs; ++ilev) { for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { auto const& amf = *(a_flux[ilev][idim]); - fluxes[ilev][idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0); + fluxes[ilev][idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0); } - sol[ilev].LocalCopy(*a_sol[ilev], 0, 0, ncomp, sol[ilev].nGrowVect()); + LocalCopy(sol[ilev], *a_sol[ilev], 0, 0, ncomp, nGrowVect(sol[ilev])); } linop.getFluxes(GetVecOfArrOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc); for (int ilev = 0; ilev < namrlevs; ++ilev) { for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - a_flux[ilev][idim]->LocalCopy(fluxes[ilev][idim], 0, 0, ncomp, IntVect(0)); + LocalCopy(*a_flux[ilev][idim], fluxes[ilev][idim], 0, 0, ncomp, IntVect(0)); } } } @@ -653,11 +653,11 @@ MLMGT::getFluxes (const Vector & a_flux, Location a_loc) Vector fluxes(namrlevs); for (int ilev = 0; ilev < namrlevs; ++ilev) { auto const& amf = *a_flux[ilev]; - fluxes[ilev].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0); + fluxes[ilev].define(boxArray(amf), DistributionMap(amf), ncomp, 0); } getFluxes(GetVecOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc); for (int ilev = 0; ilev < namrlevs; ++ilev) { - a_flux[ilev]->LocalCopy(fluxes[ilev], 0, 0, ncomp, IntVect(0)); + LocalCopy(*a_flux[ilev], fluxes[ilev], 0, 0, ncomp, IntVect(0)); } } } @@ -676,11 +676,11 @@ void MLMGT::getFluxes (const Vector & a_flux, const Vector& a_sol, Location /*a_loc*/) { - AMREX_ASSERT(a_flux[0]->nComp() >= AMREX_SPACEDIM); + AMREX_ASSERT(nComp(*a_flux[0]) >= AMREX_SPACEDIM); if constexpr (! std::is_same()) { for (int alev = 0; alev < namrlevs; ++alev) { - sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, sol[alev].nGrowVect()); + LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, nGrowVect(sol[alev])); } } @@ -718,11 +718,11 @@ MLMGT::getFluxes (const Vector & a_flux, Vector fluxes(namrlevs); for (int ilev = 0; ilev < namrlevs; ++ilev) { auto const& amf = *a_flux[ilev]; - fluxes[ilev].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0); + fluxes[ilev].define(boxArray(amf), DistributionMap(amf), ncomp, 0); } linop.getFluxes(GetVecOfPtrs(fluxes), GetVecOfPtrs(sol)); for (int ilev = 0; ilev < namrlevs; ++ilev) { - a_flux[ilev]->LocalCopy(fluxes[ilev], 0, 0, ncomp, IntVect(0)); + LocalCopy(*a_flux[ilev], fluxes[ilev], 0, 0, ncomp, IntVect(0)); } } } @@ -779,7 +779,7 @@ MLMGT::compResidual (const Vector& a_res, const Vector& a_sol, sol_is_alias.resize(namrlevs,true); for (int alev = 0; alev < namrlevs; ++alev) { - if (cf_strategy == CFStrategy::ghostnodes || a_sol[alev]->nGrowVect() == ng_sol) + if (cf_strategy == CFStrategy::ghostnodes || nGrowVect(*a_sol[alev]) == ng_sol) { sol[alev] = linop.makeAlias(*a_sol[alev]); sol_is_alias[alev] = true; @@ -790,7 +790,7 @@ MLMGT::compResidual (const Vector& a_res, const Vector& a_sol, { sol[alev] = linop.make(alev, 0, ng_sol); } - sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, IntVect(0)); + LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, IntVect(0)); } } @@ -808,9 +808,9 @@ MLMGT::compResidual (const Vector& a_res, const Vector& a_sol, const MF* prhs = a_rhs[alev]; #if (AMREX_SPACEDIM != 3) int nghost = (cf_strategy == CFStrategy::ghostnodes) ? linop.getNGrow(alev) : 0; - MF rhstmp(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost, + MF rhstmp(boxArray(*prhs), DistributionMap(*prhs), ncomp, nghost, MFInfo(), *linop.Factory(alev)); - rhstmp.LocalCopy(*prhs, 0, 0, ncomp, IntVect(nghost)); + LocalCopy(rhstmp, *prhs, 0, 0, ncomp, IntVect(nghost)); linop.applyMetricTerm(alev, 0, rhstmp); linop.unimposeNeumannBC(alev, rhstmp); linop.applyInhomogNeumannTerm(alev, rhstmp); @@ -822,9 +822,9 @@ MLMGT::compResidual (const Vector& a_res, const Vector& a_sol, *a_res[alev+1], sol[alev+1], *a_rhs[alev+1]); if (linop.isCellCentered()) { #ifdef AMREX_USE_EB - amrex::EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]); + EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]); #else - amrex::average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]); + average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]); #endif } } @@ -858,7 +858,7 @@ MLMGT::apply (const Vector& out, const Vector& a_in) nghost = linop.getNGrow(alev); in[alev] = a_in[alev]; } - else if (a_in[alev]->nGrowVect() == ng_sol) + else if (nGrowVect(*a_in[alev]) == ng_sol) { in[alev] = a_in[alev]; } @@ -866,18 +866,18 @@ MLMGT::apply (const Vector& out, const Vector& a_in) { IntVect ng = ng_sol; if (cf_strategy == CFStrategy::ghostnodes) { ng = IntVect(nghost); } - in_raii[alev].define(a_in[alev]->boxArray(), - a_in[alev]->DistributionMap(), - a_in[alev]->nComp(), ng, + in_raii[alev].define(boxArray (*a_in[alev]), + DistributionMap(*a_in[alev]), + nComp (*a_in[alev]), ng, MFInfo(), *linop.Factory(alev)); - in_raii[alev].LocalCopy(*a_in[alev], 0, 0, ncomp, IntVect(nghost)); + LocalCopy(in_raii[alev], *a_in[alev], 0, 0, ncomp, IntVect(nghost)); in[alev] = &(in_raii[alev]); } - rh[alev].define(a_in[alev]->boxArray(), - a_in[alev]->DistributionMap(), - a_in[alev]->nComp(), nghost, MFInfo(), + rh[alev].define(boxArray (*a_in[alev]), + DistributionMap(*a_in[alev]), + nComp (*a_in[alev]), nghost, MFInfo(), *linop.Factory(alev)); - rh[alev].setVal(RT(0.0)); + setVal(rh[alev], RT(0.0)); } if (!linop_prepared) { @@ -901,9 +901,9 @@ MLMGT::apply (const Vector& out, const Vector& a_in) *out[alev+1], *in[alev+1], rh[alev+1]); if (linop.isCellCentered()) { #ifdef AMREX_USE_EB - amrex::EB_average_down(*out[alev+1], *out[alev], 0, out[alev]->nComp(), amrrr[alev]); + EB_average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]); #else - amrex::average_down(*out[alev+1], *out[alev], 0, out[alev]->nComp(), amrrr[alev]); + average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]); #endif } } @@ -970,10 +970,10 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& } else { - if (a_sol[alev]->nGrowVect() == ng_sol) { + if (nGrowVect(*a_sol[alev]) == ng_sol) { if constexpr (std::is_same()) { sol[alev] = linop.makeAlias(*a_sol[alev]); - sol[alev].setBndry(RT(0.0), 0, ncomp); + setBndry(sol[alev], RT(0.0), 0, ncomp); sol_is_alias[alev] = true; } } @@ -981,8 +981,8 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& if (!solve_called) { sol[alev] = linop.make(alev, 0, ng_sol); } - sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, IntVect(0)); - sol[alev].setBndry(RT(0.0), 0, ncomp); + LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, IntVect(0)); + setBndry(sol[alev], RT(0.0), 0, ncomp); } } } @@ -994,7 +994,7 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& if (!solve_called) { rhs[alev] = linop.make(alev, 0, ng_rhs); } - rhs[alev].LocalCopy(*a_rhs[alev], 0, 0, ncomp, ng_rhs); + LocalCopy(rhs[alev], *a_rhs[alev], 0, 0, ncomp, ng_rhs); linop.applyMetricTerm(alev, 0, rhs[alev]); linop.unimposeNeumannBC(alev, rhs[alev]); linop.applyInhomogNeumannTerm(alev, rhs[alev]); @@ -1036,8 +1036,8 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& const int nmglevs = linop.NMGLevels(alev); for (int mglev = 0; mglev < nmglevs; ++mglev) { - res [alev][mglev].setVal(RT(0.0)); - rescor[alev][mglev].setVal(RT(0.0)); + setVal(res [alev][mglev], RT(0.0)); + setVal(rescor[alev][mglev], RT(0.0)); } } @@ -1054,7 +1054,7 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev,mglev)); } cor[alev][mglev] = linop.make(alev, mglev, _ng); } - cor[alev][mglev].setVal(RT(0.0)); + setVal(cor[alev][mglev], RT(0.0)); } } @@ -1070,7 +1070,7 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev,mglev)); } cor_hold[alev][mglev] = linop.make(alev, mglev, _ng); } - cor_hold[alev][mglev].setVal(RT(0.0)); + setVal(cor_hold[alev][mglev], RT(0.0)); } } for (int alev = 1; alev < finest_amr_lev; ++alev) @@ -1081,7 +1081,7 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev)); } cor_hold[alev][0] = linop.make(alev, 0, _ng); } - cor_hold[alev][0].setVal(RT(0.0)); + setVal(cor_hold[alev][0], RT(0.0)); } if (linop.m_parent // no embedded N-Solve @@ -1110,30 +1110,32 @@ template void MLMGT::prepareForNSolve () { - ns_linop = linop.makeNLinOp(nsolve_grid_size); + if constexpr (IsMultiFabLike_v) { + ns_linop = linop.makeNLinOp(nsolve_grid_size); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) { nghost = linop.getNGrow(); } - - const BoxArray& ba = (*ns_linop).m_grids[0][0]; - const DistributionMapping& dm =(*ns_linop).m_dmap[0][0]; - - int ng = 1; - if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; } - ns_sol = std::make_unique(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0))); - ng = 0; - if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; } - ns_rhs = std::make_unique(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0))); - ns_sol->setVal(RT(0.0)); - ns_rhs->setVal(RT(0.0)); - - ns_linop->setLevelBC(0, ns_sol.get()); - - ns_mlmg = std::make_unique>(*ns_linop); - ns_mlmg->setVerbose(0); - ns_mlmg->setFixedIter(1); - ns_mlmg->setMaxFmgIter(20); - ns_mlmg->setBottomSolver(BottomSolver::smoother); + int nghost = 0; + if (cf_strategy == CFStrategy::ghostnodes) { nghost = linop.getNGrow(); } + + const BoxArray& ba = (*ns_linop).m_grids[0][0]; + const DistributionMapping& dm =(*ns_linop).m_dmap[0][0]; + + int ng = 1; + if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; } + ns_sol = std::make_unique(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0))); + ng = 0; + if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; } + ns_rhs = std::make_unique(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0))); + setVal(*ns_sol, RT(0.0)); + setVal(*ns_rhs, RT(0.0)); + + ns_linop->setLevelBC(0, ns_sol.get()); + + ns_mlmg = std::make_unique>(*ns_linop); + ns_mlmg->setVerbose(0); + ns_mlmg->setFixedIter(1); + ns_mlmg->setMaxFmgIter(20); + ns_mlmg->setBottomSolver(BottomSolver::smoother); + } } // in : Residual (res) on the finest AMR level @@ -1149,7 +1151,7 @@ void MLMGT::oneIter (int iter) IntVect nghost(0); if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(alev)); } - sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost); + LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost); // compute residual for the coarse AMR level computeResWithCrseSolFineCor(alev-1,alev); @@ -1175,7 +1177,7 @@ void MLMGT::oneIter (int iter) IntVect nghost(0); if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(0)); } - sol[0].LocalAdd(cor[0][0], 0, 0, ncomp, nghost); + LocalAdd(sol[0], cor[0][0], 0, 0, ncomp, nghost); } for (int alev = 1; alev <= finest_amr_lev; ++alev) @@ -1185,10 +1187,10 @@ void MLMGT::oneIter (int iter) IntVect nghost(0); if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(alev)); } - sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost); + LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost); if (alev != finest_amr_lev) { - cor_hold[alev][0].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost); + LocalAdd(cor_hold[alev][0], cor[alev][0], 0, 0, ncomp, nghost); } // Update fine AMR level correction @@ -1196,10 +1198,10 @@ void MLMGT::oneIter (int iter) miniCycle(alev); - sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost); + LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost); if (alev != finest_amr_lev) { - cor[alev][0].LocalAdd(cor_hold[alev][0], 0, 0, ncomp, nghost); + LocalAdd(cor[alev][0], cor_hold[alev][0], 0, 0, ncomp, nghost); } } @@ -1231,12 +1233,12 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { - RT norm = res[amrlev][mglev].norminf(0,ncomp,IntVect(0)); + RT norm = norminf(res[amrlev][mglev],0,ncomp,IntVect(0)); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm before smooth " << norm << "\n"; } - cor[amrlev][mglev].setVal(RT(0.0)); + setVal(cor[amrlev][mglev], RT(0.0)); bool skip_fillboundary = true; for (int i = 0; i < nu1; ++i) { linop.smooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev], skip_fillboundary); @@ -1248,7 +1250,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { - RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0)); + RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0)); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm after smooth " << norm << "\n"; } @@ -1262,7 +1264,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) { if (verbose >= 4) { - RT norm = res[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0)); + RT norm = norminf(res[amrlev][mglev_bottom],0,ncomp,IntVect(0)); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " DN: Norm before bottom " << norm << "\n"; } @@ -1270,7 +1272,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev_bottom); - RT norm = rescor[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0)); + RT norm = norminf(rescor[amrlev][mglev_bottom],0,ncomp,IntVect(0)); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " UP: Norm after bottom " << norm << "\n"; } @@ -1279,11 +1281,11 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) { if (verbose >= 4) { - RT norm = res[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0)); + RT norm = norminf(res[amrlev][mglev_bottom],0,ncomp,IntVect(0)); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm before smooth " << norm << "\n"; } - cor[amrlev][mglev_bottom].setVal(RT(0.0)); + setVal(cor[amrlev][mglev_bottom], RT(0.0)); bool skip_fillboundary = true; for (int i = 0; i < nu1; ++i) { linop.smooth(amrlev, mglev_bottom, cor[amrlev][mglev_bottom], @@ -1293,7 +1295,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev_bottom); - RT norm = rescor[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0)); + RT norm = norminf(rescor[amrlev][mglev_bottom],0,ncomp,IntVect(0)); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm after smooth " << norm << "\n"; } @@ -1308,7 +1310,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev); - RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0)); + RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0)); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm before smooth " << norm << "\n"; } @@ -1321,7 +1323,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev); - RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0)); + RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0)); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm after smooth " << norm << "\n"; } @@ -1361,12 +1363,12 @@ MLMGT::mgFcycle () // rescor = res - L(cor) computeResOfCorrection(amrlev, mglev); // res = rescor; this provides b to the vcycle below - res[amrlev][mglev].LocalCopy(rescor[amrlev][mglev], 0, 0, ncomp, nghost); + LocalCopy(res[amrlev][mglev], rescor[amrlev][mglev], 0, 0, ncomp, nghost); // save cor; do v-cycle; add the saved to cor std::swap(cor[amrlev][mglev], cor_hold[amrlev][mglev]); mgVcycle(amrlev, mglev); - cor[amrlev][mglev].LocalAdd(cor_hold[amrlev][mglev], 0, 0, ncomp, nghost); + LocalAdd(cor[amrlev][mglev], cor_hold[amrlev][mglev], 0, 0, ncomp, nghost); } } @@ -1393,16 +1395,16 @@ MLMGT::NSolve (MLMGT& a_solver, MF& a_sol, MF& a_rhs) { BL_PROFILE("MLMG::NSolve()"); - a_sol.setVal(RT(0.0)); + setVal(a_sol, RT(0.0)); MF const& res_bottom = res[0].back(); - if (BoxArray::SameRefs(a_rhs.boxArray(),res_bottom.boxArray()) && - DistributionMapping::SameRefs(a_rhs.DistributionMap(),res_bottom.DistributionMap())) + if (BoxArray::SameRefs(boxArray(a_rhs),boxArray(res_bottom)) && + DistributionMapping::SameRefs(DistributionMap(a_rhs),DistributionMap(res_bottom))) { - a_rhs.LocalCopy(res_bottom, 0, 0, ncomp, IntVect(0)); + LocalCopy(a_rhs, res_bottom, 0, 0, ncomp, IntVect(0)); } else { - a_rhs.setVal(RT(0.0)); - a_rhs.ParallelCopy(res_bottom); + setVal(a_rhs, RT(0.0)); + ParallelCopy(a_rhs, res_bottom, 0, 0, ncomp); } a_solver.solve(Vector{&a_sol}, Vector{&a_rhs}, @@ -1428,7 +1430,7 @@ MLMGT::actualBottomSolve () auto& x = cor[amrlev][mglev]; auto& b = res[amrlev][mglev]; - x.setVal(RT(0.0)); + setVal(x, RT(0.0)); if (bottom_solver == BottomSolver::smoother) { @@ -1444,9 +1446,9 @@ MLMGT::actualBottomSolve () MF raii_b; if (linop.isBottomSingular() && linop.getEnforceSingularSolvable()) { - const IntVect ng = b.nGrowVect(); + const IntVect ng = nGrowVect(b); raii_b = linop.make(amrlev, mglev, ng); - raii_b.LocalCopy(b, 0, 0, ncomp, ng); + LocalCopy(raii_b, b, 0, 0, ncomp, ng); bottom_b = &raii_b; makeSolvable(amrlev,mglev,*bottom_b); @@ -1486,7 +1488,7 @@ MLMGT::actualBottomSolve () int ret = bottomSolveWithCG(x, *bottom_b, cg_type); // If the MLMG solve failed then set the correction to zero if (ret != 0) { - cor[amrlev][mglev].setVal(RT(0.0)); + setVal(cor[amrlev][mglev], RT(0.0)); if (bottom_solver == BottomSolver::cgbicg || bottom_solver == BottomSolver::bicgcg) { if (bottom_solver == BottomSolver::cgbicg) { @@ -1496,7 +1498,7 @@ MLMGT::actualBottomSolve () } ret = bottomSolveWithCG(x, *bottom_b, cg_type); if (ret != 0) { - cor[amrlev][mglev].setVal(RT(0.0)); + setVal(cor[amrlev][mglev], RT(0.0)); } else { // switch permanently if (cg_type == MLCGSolverT::Type::CG) { bottom_solver = BottomSolver::cg; @@ -1591,7 +1593,7 @@ MLMGT::computeResWithCrseSolFineCor (int calev, int falev) linop.solutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata); linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous); - fine_res.LocalCopy(fine_rescor, 0, 0, ncomp, nghost); + LocalCopy(fine_res, fine_rescor, 0, 0, ncomp, nghost); linop.reflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); @@ -1619,7 +1621,7 @@ MLMGT::computeResWithCrseCorFineCor (int falev) // fine_rescor = fine_res - L(fine_cor) linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Inhomogeneous, &crse_cor); - fine_res.LocalCopy(fine_rescor, 0, 0, ncomp, nghost); + LocalCopy(fine_res, fine_rescor, 0, 0, ncomp, nghost); } // Interpolate correction from coarse to fine AMR level. @@ -1648,9 +1650,9 @@ MLMGT::interpCorrection (int alev) } MF cfine = linop.makeCoarseAmr(alev, IntVect(ng_dst)); - cfine.setVal(RT(0.0)); - cfine.ParallelCopy(crse_cor, 0, 0, ncomp, IntVect(ng_src), IntVect(ng_dst), - crse_geom.periodicity()); + setVal(cfine, RT(0.0)); + ParallelCopy(cfine, crse_cor, 0, 0, ncomp, IntVect(ng_src), IntVect(ng_dst), + crse_geom.periodicity()); linop.interpolationAmr(alev, fine_cor, cfine, nghost); // NOLINT(readability-suspicious-call-argument) } @@ -1689,7 +1691,7 @@ MLMGT::addInterpCorrection (int alev, int mglev) else { cfine = linop.makeCoarseMG(alev, mglev, IntVect(0)); - cfine.ParallelCopy(crse_cor,0,0,ncomp,IntVect(0),IntVect(0)); + ParallelCopy(cfine, crse_cor, 0, 0, ncomp); cmf = &cfine; } From a068330e6c66b5d9a7c6ca0e1c874f318e73f4cc Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 2 Jan 2024 07:42:38 -0800 Subject: [PATCH 076/186] Update CHANGES for 24.01 (#3686) --- CHANGES | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/CHANGES b/CHANGES index 82b28a03e87..cad5363b99e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,53 @@ +# 24.01 + + -- MLMG: Use free functions instead of MF member functions (#3681) + + -- Add a few free functions for MLMG (#3680) + + -- Eliminating Matrix operations in MLMG CG bottom solver if initial vector is zero (#3668) + + -- Add a for loop that is unrolled at compile time (#3674) + + -- Add PTD version of getParticleCell (#3675) + + -- Improve ParIter docs (#3676) + + -- Fix CI for ROCm 6.0 (#3673) + + -- PureSoA IdCpu fixes (#3671) + + -- CMake: AMReX_PARALLEL_LINK_JOBS (#3628) + + -- Clang-Tidy in CI: Keep Going after Errors (#3667) + + -- Delete empty below comments on classes and functions (#3669) + + -- Documentation for Profiling: Hot Spots and Load Balance (#3622) + + -- Fix warnings in SortParticlesForDeposition (#3664) + + -- Fix Resize Issue of Fab with the Async Arena (#3663) + + -- Fix SuperParticle `push_back` (#3661) + + -- Pure SoA Particle: Separate Array for IdCPU (#3585) + + -- Limit the scope of gpu_rand_generator (#3659) + + -- Fix a typo in doxygen for NonLocalBC::FillBoundary (#3658) + + -- GNU Make: Fix name collision for aurora (#3656) + + -- two separate fixes -- particle_compare and ref_ratio=1 (#3655) + + -- Clarify documentation on setEBDirchlet() and fix link to AMReX-Hydro (#3652) + + -- Robustify the Cache Cleanup Scripts (#3650) + + -- Disable CodeQL scheduled jobs on forks (#3649) + + -- Work around compiler bug in nvcc 12.2 by using functor instead of lambda (#3653) + # 23.12 -- solve_cg: avoid use of MF `z` (#3637) From f1ec8df75c562d2a4822cea84d284cf8e72c2e14 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 4 Jan 2024 08:58:46 -0800 Subject: [PATCH 077/186] Install Codeplay packages via APT (#3687) Codeplay now provides packages via APT. Token is no longer needed. --- .../dependencies/dependencies_codeplay.sh | 26 ++++++++++++++----- .../dependencies/dependencies_hip.sh | 6 +++-- .github/workflows/intel.yml | 22 +++++----------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/.github/workflows/dependencies/dependencies_codeplay.sh b/.github/workflows/dependencies/dependencies_codeplay.sh index dfd2e5a03f7..bf692d3c2c7 100755 --- a/.github/workflows/dependencies/dependencies_codeplay.sh +++ b/.github/workflows/dependencies/dependencies_codeplay.sh @@ -6,10 +6,24 @@ set -eu -o pipefail -curl -o oneapi_nvidia.sh -L "https://developer.codeplay.com/api/v1/products/download?product=oneapi&variant=nvidia&filters[]=linux&aat=$1" -chmod +x oneapi_nvidia.sh -sudo ./oneapi_nvidia.sh --yes +# `man apt.conf`: +# Number of retries to perform. If this is non-zero APT will retry +# failed files the given number of times. +echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries -curl -o oneapi_amd.sh -L "https://developer.codeplay.com/api/v1/products/download?product=oneapi&variant=amd&filters[]=linux&aat=$1" -chmod +x oneapi_amd.sh -sudo ./oneapi_amd.sh --yes +# https://developer.codeplay.com/apt/index.html +sudo wget -qO - https://developer.codeplay.com/apt/public.key | gpg --dearmor | sudo tee /usr/share/keyrings/codeplay-keyring.gpg > /dev/null +echo "deb [signed-by=/usr/share/keyrings/codeplay-keyring.gpg] https://developer.codeplay.com/apt all main" | sudo tee /etc/apt/sources.list.d/codeplay.list + +sudo apt-get update + +# try apt install up to five times, to avoid connection splits +status=1 +for itry in {1..5} +do + sudo apt-get install -y --no-install-recommends \ + $1 \ + && { sudo apt-get clean; status=0; break; } \ + || { sleep 10; } +done +if [[ ${status} -ne 0 ]]; then exit 1; fi diff --git a/.github/workflows/dependencies/dependencies_hip.sh b/.github/workflows/dependencies/dependencies_hip.sh index 36df2f384ba..4673a7caed5 100755 --- a/.github/workflows/dependencies/dependencies_hip.sh +++ b/.github/workflows/dependencies/dependencies_hip.sh @@ -43,8 +43,10 @@ sudo apt-get install -y --no-install-recommends \ roctracer-dev \ rocprofiler-dev \ rocrand-dev \ - rocprim-dev \ - hiprand-dev + rocprim-dev + +# hiprand-dev is a new package that does not exist in old versions +sudo apt-get install -y --no-install-recommends hiprand-dev || true # activate # diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 6474214e0a2..aa1036ddf94 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -97,19 +97,16 @@ jobs: tests-oneapi-sycl-eb-nvidia: name: oneAPI SYCL for Nvidia GPUs [tests w/ EB] runs-on: ubuntu-latest - env: - CODEPLAYTOKEN: ${{ secrets.CODEPLAYTOKEN }} steps: - uses: actions/checkout@v3 - name: Dependencies - if: ${{ env.CODEPLAYTOKEN != '' }} run: | - .github/workflows/dependencies/dependencies_nvcc.sh + .github/workflows/dependencies/ubuntu_free_disk_space.sh .github/workflows/dependencies/dependencies_dpcpp.sh - .github/workflows/dependencies/dependencies_codeplay.sh ${{ env.CODEPLAYTOKEN }} + .github/workflows/dependencies/dependencies_nvcc.sh 12.0 + .github/workflows/dependencies/dependencies_codeplay.sh oneapi-nvidia-12.0 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - if: ${{ env.CODEPLAYTOKEN != '' }} uses: actions/cache@v3 with: path: ~/.cache/ccache @@ -117,9 +114,7 @@ jobs: restore-keys: | ccache-${{ github.workflow }}-${{ github.job }}-git- - name: Build & Install - if: ${{ env.CODEPLAYTOKEN != '' }} - # clang currently supports CUDA up to version 11.5 and a warning is issued with newer versions - env: {CXXFLAGS: "-fsycl -fsycl-targets=nvptx64-nvidia-cuda -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-unknown-cuda-version"} + env: {CXXFLAGS: "-fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} run: | export CCACHE_COMPRESS=1 export CCACHE_COMPRESSLEVEL=10 @@ -148,20 +143,16 @@ jobs: no-tests-oneapi-sycl-amd: name: oneAPI SYCL for AMD GPUs runs-on: ubuntu-20.04 - env: - CODEPLAYTOKEN: ${{ secrets.CODEPLAYTOKEN }} steps: - uses: actions/checkout@v3 - name: Dependencies - if: ${{ env.CODEPLAYTOKEN != '' }} run: | .github/workflows/dependencies/ubuntu_free_disk_space.sh - .github/workflows/dependencies/dependencies_hip.sh 5.4.6 .github/workflows/dependencies/dependencies_dpcpp.sh - .github/workflows/dependencies/dependencies_codeplay.sh ${{ env.CODEPLAYTOKEN }} + .github/workflows/dependencies/dependencies_hip.sh 5.4.3 + .github/workflows/dependencies/dependencies_codeplay.sh oneapi-amd-5.4.3 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - if: ${{ env.CODEPLAYTOKEN != '' }} uses: actions/cache@v3 with: path: ~/.cache/ccache @@ -169,7 +160,6 @@ jobs: restore-keys: | ccache-${{ github.workflow }}-${{ github.job }}-git- - name: Build & Install - if: ${{ env.CODEPLAYTOKEN != '' }} env: {CXXFLAGS: "-fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx90a -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} run: | export CCACHE_COMPRESS=1 From e0b77e1fce491021696d22f1496138e8d9942d65 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 8 Jan 2024 09:18:49 -0800 Subject: [PATCH 078/186] Fix Advection_AmrCore test (#3690) The particle container should not be a static member, because it's a member of a class derived from AmrCore, not AmrLevel. The issue of its being a static member is that it is not deleted properly (unless we explicitly do it). Note that in the AmrLevel case we must make it a static member because all AmrLevel objects need to share the same particle container. --- Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H | 6 +++--- Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp | 8 -------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H index fff3de9fea6..eee16a7ace5 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H +++ b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H @@ -87,7 +87,7 @@ public: amrex::Real EstTimeStep (int lev, amrex::Real time); #ifdef AMREX_PARTICLES - static amrex::AmrTracerParticleContainer* theTracerPC () { return TracerPC.get(); } + amrex::AmrTracerParticleContainer* theTracerPC () { return TracerPC.get(); } #endif @@ -217,8 +217,8 @@ private: #ifdef AMREX_PARTICLES void init_particles (); - static int do_tracers; - static std::unique_ptr TracerPC; + int do_tracers = 0; + std::unique_ptr TracerPC; #endif }; diff --git a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp index 48a5d96e37d..e79d8615c18 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp @@ -15,14 +15,6 @@ using namespace amrex; - - -#ifdef AMREX_PARTICLES -std::unique_ptr AmrCoreAdv::TracerPC = nullptr; -int AmrCoreAdv::do_tracers = 0; -#endif - - // constructor - reads in parameters from inputs file // - sizes multilevel arrays and data structures // - initializes BCRec boundary condition object From 656bb64515bda87807c97e82321cb998aa7bf16f Mon Sep 17 00:00:00 2001 From: AlexanderSinn <64009254+AlexanderSinn@users.noreply.github.com> Date: Tue, 9 Jan 2024 21:24:04 +0100 Subject: [PATCH 079/186] Align GpuComplex to its size (#3691) ## Summary As discussed in #3677, this PR makes the alignment of `amrex::GpuComplex` stricter to allow for coalesced memory accesses of arrays of GpuComplex by nvidia GPUs such as A100. Note that this may break `reinterpret_cast` from an array allocated as `std::complex` to `amrex::GpuComplex`, but not the other way around. ## Additional background Typical allocators (malloc, amrex CArena) give memory aligned to 16 bytes and CUDA allocators aligned to 256 bytes, which is sufficient for `amrex::GpuComplex`. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_GpuComplex.H | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Src/Base/AMReX_GpuComplex.H b/Src/Base/AMReX_GpuComplex.H index 205788375f4..274da82604d 100644 --- a/Src/Base/AMReX_GpuComplex.H +++ b/Src/Base/AMReX_GpuComplex.H @@ -20,9 +20,12 @@ T norm (const GpuComplex& a_z) noexcept; * work in device code with Cuda yet. * * Should be bit-wise compatible with std::complex. + * + * GpuComplex is aligned to its size (stricter than std::complex) to allow for + * coalesced memory accesses with nvidia GPUs. */ template -struct GpuComplex +struct alignas(2*sizeof(T)) GpuComplex { using value_type = T; From e780f33fed28fbd9aee951e09c9aecf42e596a97 Mon Sep 17 00:00:00 2001 From: David Grote Date: Thu, 11 Jan 2024 12:16:08 -0800 Subject: [PATCH 080/186] Fix BL_PROFILE_TINY_FLUSH (#3695) ## Summary This fixes the BL_PROFILE_TINY_FLUSH macro. The `amrex` prefix was left off of the call to `TinyProfiler::MemoryFinalize`. This produced a compilation error that `TinyProfiler` was not defined. Note that this is in code that does not have `using namespace amrex`. ## Additional background ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_BLProfiler.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_BLProfiler.H b/Src/Base/AMReX_BLProfiler.H index 3b72a627881..b7f8e6a1c27 100644 --- a/Src/Base/AMReX_BLProfiler.H +++ b/Src/Base/AMReX_BLProfiler.H @@ -514,7 +514,7 @@ inline std::string BLProfiler::CommStats::CFTToString(CommFuncType cft) { #define BL_PROFILE_REGION_VAR(fname, rvname) #define BL_PROFILE_REGION_VAR_START(fname, rvname) #define BL_PROFILE_REGION_VAR_STOP(fname, rvname) -#define BL_PROFILE_TINY_FLUSH() amrex::TinyProfiler::Finalize(true); TinyProfiler::MemoryFinalize(true) +#define BL_PROFILE_TINY_FLUSH() amrex::TinyProfiler::Finalize(true); amrex::TinyProfiler::MemoryFinalize(true) #define BL_PROFILE_FLUSH() #define BL_TRACE_PROFILE_FLUSH() #define BL_TRACE_PROFILE_SETFLUSHSIZE(fsize) From 8692f601b428c90725423eb2c5693e12ccc4f457 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 11 Jan 2024 21:52:24 -0800 Subject: [PATCH 081/186] Add special named flag for invalid particles (#3688) This lets you do `p.m_idcpu = LongParticleIDs::InvalidParticleID;` instead of `amrex::ParticleIDWrapper{p.m_idcpu} = -1;` The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Axel Huebl --- Src/Particle/AMReX_Particle.H | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 4ae8b7c4365..818a5dbdf3e 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -15,7 +15,7 @@ namespace amrex { namespace { - /** Used for 64bit Long particle Ids as in AoS layout */ + /** Special flags used for 64-bit Long particle Ids */ namespace LongParticleIds { constexpr Long GhostParticleID = 549755813887L; // 2**39-1 constexpr Long VirtualParticleID = GhostParticleID - 1; @@ -25,6 +25,15 @@ namespace } using namespace LongParticleIds; + + /** Flags used to set the entire uint64_t idcpu + to special values at once. + */ + namespace ParticleIdCpus { + constexpr std::uint64_t Invalid = 16777216; // corresponds to id = -1, cpu = 0 + } + + using namespace ParticleIdCpus; } struct ParticleIDWrapper From d44001045c8074b620fcf176bda5ba35f08267f9 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 12 Jan 2024 14:48:13 -0800 Subject: [PATCH 082/186] GMRES (#3648) Add a GMRES solver class template This is still work in progress. So the interface may not be stable yet. --- Src/Base/AMReX_TableData.H | 21 +- Src/LinearSolvers/AMReX_GMRES.H | 406 ++++++++++++++++++ Src/LinearSolvers/AMReX_GMRES_MLMG.H | 147 +++++++ Src/LinearSolvers/CMakeLists.txt | 3 + Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 3 + Src/LinearSolvers/MLMG/AMReX_MLMG.H | 31 +- Src/LinearSolvers/MLMG/Make.package | 4 + Src/LinearSolvers/Make.package | 9 + Src/LinearSolvers/OpenBC/Make.package | 4 + .../LinearSolvers/ABecLaplacian_C/GNUmakefile | 2 +- Tests/LinearSolvers/ABecLaplacian_C/MyTest.H | 4 + .../LinearSolvers/ABecLaplacian_C/MyTest.cpp | 91 +++- .../ABecLaplacian_C/MyTestPlotfile.cpp | 3 +- .../ABecLaplacian_C/inputs.gmres | 17 + 14 files changed, 716 insertions(+), 29 deletions(-) create mode 100644 Src/LinearSolvers/AMReX_GMRES.H create mode 100644 Src/LinearSolvers/AMReX_GMRES_MLMG.H create mode 100644 Src/LinearSolvers/Make.package create mode 100644 Tests/LinearSolvers/ABecLaplacian_C/inputs.gmres diff --git a/Src/Base/AMReX_TableData.H b/Src/Base/AMReX_TableData.H index 842225e53f4..9d0db5bd8d1 100644 --- a/Src/Base/AMReX_TableData.H +++ b/Src/Base/AMReX_TableData.H @@ -22,8 +22,7 @@ struct Table1D int begin = 1; int end = 0; - AMREX_GPU_HOST_DEVICE - constexpr Table1D () noexcept {} + constexpr Table1D () noexcept = default; template ::value,int>::type = 0> AMREX_GPU_HOST_DEVICE @@ -81,8 +80,7 @@ struct Table2D GpuArray begin{{1,1}}; GpuArray end{{0,0}}; - AMREX_GPU_HOST_DEVICE - constexpr Table2D () noexcept {} + constexpr Table2D () noexcept = default; template ::value,int>::type = 0> AMREX_GPU_HOST_DEVICE @@ -147,8 +145,7 @@ struct Table3D GpuArray begin{{1,1,1}}; GpuArray end{{0,0,0}}; - AMREX_GPU_HOST_DEVICE - constexpr Table3D () noexcept {} + constexpr Table3D () noexcept = default; template ::value,int>::type = 0> AMREX_GPU_HOST_DEVICE @@ -219,8 +216,7 @@ struct Table4D GpuArray begin{{1,1,1,1}}; GpuArray end{{0,0,0,0}}; - AMREX_GPU_HOST_DEVICE - constexpr Table4D () noexcept {} + constexpr Table4D () noexcept = default; template ::value,int>::type = 0> AMREX_GPU_HOST_DEVICE @@ -337,7 +333,7 @@ public: std::conditional_t, Table4D > > >; - TableData () noexcept; + TableData () noexcept = default; explicit TableData (Arena* ar) noexcept; @@ -351,11 +347,11 @@ public: ~TableData () noexcept; - constexpr int dim () const noexcept { return N; } + [[nodiscard]] constexpr int dim () const noexcept { return N; } void resize (Array const& lo, Array const& hi, Arena* ar = nullptr); - Long size () const noexcept; + [[nodiscard]] Long size () const noexcept; Array const& lo () const noexcept { return m_lo; } @@ -380,9 +376,6 @@ private: bool m_ptr_owner = false; }; -template -TableData::TableData () noexcept {} - template TableData::TableData (Array const& lo, Array const& hi, Arena* ar) : DataAllocator{ar}, m_lo(lo), m_hi(hi) diff --git a/Src/LinearSolvers/AMReX_GMRES.H b/Src/LinearSolvers/AMReX_GMRES.H new file mode 100644 index 00000000000..fd729c9e45d --- /dev/null +++ b/Src/LinearSolvers/AMReX_GMRES.H @@ -0,0 +1,406 @@ +#ifndef AMREX_GMRES_H_ +#define AMREX_GMRES_H_ +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace amrex { + +/** + * \brief GMRES + * + * This class implements the GMRES algorithm. The template parameter V is + * for a linear algebra vector class. For example, it could be + * amrex::MultiFab. The other template parameter M is for a linear operator + * class with a number of required member functions. Note that conceptually + * M contains a matrix. However, it does not mean it needs to have a member + * variable storing the matrix, because GMRES only needs the matrix vector + * product, not the matrix itself. + * + * \tparam V linear algebra vector. It must be default constructible, move + * constructible, and move assignable. + * \tparam M linear operator. A list of required member functions for M is + * shown below. Here RT (typename V::value_type) is either double + * or float. + * - void apply(V& lhs, V const& rhs)\n + * lhs = L(rhs), where L is the linear operator performing matrix + * vector product. + * - void assign(V& lhs, V const& rhs)\n + * lhs = rhs. + * - RT dotProduct(V const& v1, V const& v2)\n + * returns v1 * v2. + * - void increment(V& lhs, V const& rhs, RT a)\n + * lhs += a * rhs. + * - void linComb(V& lhs, RT a, V const& rhs_a, RT b, V const& rhs_b)\n + * lhs = a * rhs_a + b * rhs_b. + * - V makeVecRHS()\n + * returns a V object that is suitable as RHS in M x = b. The reason + * we distinguish between LHS and RHS is M might need the distinction + * for efficiency. For example, if V is MultiFab, we might need the x + * in the LHS of M x = b to have ghost cells for efficiency, whereas + * no ghost cells are needed for the RHS (i.e., b). + * - V makeVecLHS()\n + * returns a V object that is suitable as LHS in M x = b. See the + * description for makeVecRHS for more details. + * - RT norm2(V const& v)\n + * returns the 2-norm of v. + * - void precond(V& lhs, V const& rhs)\n + * applies preconditioner to rhs. If there is no preconditioner, + * this function should do lhs = rhs. + * - void setVal(V& v, RT value)\n + * v = value. + */ +template +class GMRES +{ +public: + + using RT = typename V::value_type; // double or float + + GMRES (); + + //! Defines with a reference to M. It's the user's responsibility to + //! keep the M object alive for GMRES to be functional. This function + //! must be called before solve() can be called. + void define (M& linop); + + /** + * \brief Solve the linear system + * + * \param a_sol unknowns, i.e., x in A x = b. + * \param a_rhs RHS, i.e., b in A x = b. + * \param a_tol_rel relative tolerance. + * \param a_tol_abs absolute tolerance. + * \patam a_its optional argument specifying the maximum number of iterations. + */ + void solve (V& a_sol, V const& a_rhs, RT a_tol_rel, RT a_tol_abs, int a_its=-1); + + //! Sets verbosity. + void setVerbose (int v) { m_verbose = v; } + + //! Sets restart length. The default is 30. + void setRestartLength (int rl); + + //! Gets the number of iterations. + [[nodiscard]] int getNumIters () const { return m_its; } + + //! Gets the solver status. + [[nodiscard]] int getStatus () const { return m_status; } + + //! Gets the 2-norm of the residual. + [[nodiscard]] RT getResidualNorm () const { return m_res; } + +private: + void clear (); + void allocate_scratch (); + void cycle (V& a_xx, int& a_status, int& a_itcount, RT& a_rnorm0); + void build_solution (V& a_xx, int it); + void compute_residual (V& a_rr, V const& a_xx, V const& a_bb); + + bool converged (RT r0, RT r) const; + + void gram_schmidt_orthogonalization (int it); + void update_hessenberg (int it, bool happyend, RT& res); + + int m_verbose = 0; + int m_maxiter = 2000; + int m_its = 0; + int m_status = -1; + int m_restrtlen = 30; + RT m_res = std::numeric_limits::max(); + RT m_rtol = RT(0); + RT m_atol = RT(0); + Vector m_hh_1d; + Vector m_hes_1d; + Table2D m_hh; + Table2D m_hes; + Vector m_grs; + Vector m_cc; + Vector m_ss; + std::unique_ptr m_v_tmp_rhs; + std::unique_ptr m_v_tmp_lhs; + Vector m_vv; + M* m_linop = nullptr; +}; + +template +GMRES::GMRES () +{ + allocate_scratch(); +} + +template +void GMRES::allocate_scratch () +{ + int rs = m_restrtlen; + + m_hh_1d.resize(std::size_t(rs + 2) * (rs + 1)); + m_hh = Table2D(m_hh_1d.data(), {0,0}, {rs+1,rs}); // (0:rs+1,0:rs) + + m_hes_1d.resize(std::size_t(rs + 2) * (rs + 1)); + m_hes = Table2D(m_hes_1d.data(), {0,0}, {rs+1,rs}); // (0:rs+1,0:rs) + + m_grs.resize(rs + 2); + m_cc.resize(rs + 1); + m_ss.resize(rs + 1); +} + +template +void GMRES::setRestartLength (int rl) +{ + if (m_restrtlen != rl) { + m_restrtlen = rl; + allocate_scratch(); + m_vv.clear(); + } +} + +template +void GMRES::define (M& linop) +{ + clear(); + m_linop = &linop; +} + +template +void GMRES::clear () +{ + m_its = 0; + m_status = -1; + m_res = std::numeric_limits::max(); + m_v_tmp_rhs.reset(); + m_v_tmp_lhs.reset(); + m_vv.clear(); + m_linop = nullptr; +} + +template +bool GMRES::converged (RT r0, RT r) const +{ + return (r < r0*m_rtol) || (r < m_atol); +} + +template +void GMRES::solve (V& a_sol, V const& a_rhs, RT a_tol_rel, RT a_tol_abs, int a_its) +{ + BL_PROFILE("GMRES::solve()"); + + AMREX_ALWAYS_ASSERT(m_linop != nullptr); + + auto t0 = amrex::second(); + + if (m_v_tmp_rhs == nullptr) { + m_v_tmp_rhs = std::make_unique(m_linop->makeVecRHS()); + } + if (m_v_tmp_lhs == nullptr) { + m_v_tmp_lhs = std::make_unique(m_linop->makeVecLHS()); + } + if (m_vv.empty()) { + m_vv.resize(m_restrtlen+1); + for (auto& v : m_vv) { + v = m_linop->makeVecRHS(); + } + } + + m_rtol = a_tol_rel; + m_atol = a_tol_abs; + + if (a_its < 0) { a_its = m_maxiter; } + + auto rnorm0 = RT(0); + + m_linop->assign(m_vv[0], a_rhs); + m_linop->setVal(a_sol, RT(0.0)); + + m_its = 0; + m_status = -1; + cycle(a_sol, m_status, m_its, rnorm0); + + while (m_status == -1 && m_its < a_its) { + compute_residual(m_vv[0], a_sol, a_rhs); + cycle(a_sol, m_status, m_its, rnorm0); + } + + if (m_status == -1 && m_its >= a_its) { m_status = 1; } + + m_v_tmp_rhs.reset(); + m_v_tmp_lhs.reset(); + m_vv.clear(); + + auto t1 = amrex::second(); + if (m_verbose > 0) { + amrex::Print() << "GMRES: Solve Time = " << t1-t0 << std::endl; + } +} + +template +void GMRES::cycle (V& a_xx, int& a_status, int& a_itcount, RT& a_rnorm0) +{ + BL_PROFILE("GMREA::cycle()"); + + m_res = m_linop->norm2(m_vv[0]); + m_grs[0] = m_res; + + if (m_res == RT(0.0)) { + a_status = 0; + return; + } + + m_linop->scale(m_vv[0], RT(1.0)/m_res); + + if (a_itcount == 0) { a_rnorm0 = m_res; } + + a_status = converged(a_rnorm0,m_res) ? 0 : -1; + + int it = 0; + while (it < m_restrtlen && a_itcount < m_maxiter) + { + if (m_verbose > 1) { + amrex::Print() << "GMRES: iter = " << a_itcount + << ", residual = " << m_res << ", " << m_res/a_rnorm0 + << " (rel.)\n"; + } + + if (a_status == 0) { break; } + + auto const& vv_it = m_vv[it ]; + auto & vv_it1 = m_vv[it+1]; + + m_linop->precond(*m_v_tmp_lhs, vv_it); + m_linop->apply(vv_it1, *m_v_tmp_lhs); + + gram_schmidt_orthogonalization(it); + + auto tt = m_linop->norm2(vv_it1); + + auto const small = RT((sizeof(RT) == 8) ? 1.e-99 : 1.e-30); + bool happyend = (tt < small); + if (!happyend) { + m_linop->scale(vv_it1, RT(1.0)/tt); + } + + m_hh (it+1,it) = tt; + m_hes(it+1,it) = tt; + + update_hessenberg(it, happyend, m_res); + + ++it; + ++a_itcount; + a_status = converged(a_rnorm0, m_res) ? 0 : -1; + if (happyend) { break; } + } + + if ((m_verbose > 1) && (a_status != 0 || a_itcount >= m_maxiter)) { + amrex::Print() << "GMRES: iter = " << a_itcount + << ", residual = " << m_res << ", " << m_res/a_rnorm0 + << " (rel.)\n"; + } + + build_solution(a_xx, it-1); +} + +template +void GMRES::gram_schmidt_orthogonalization (int const it) +{ + // Two unmodified Gram-Schmidt Orthogonalization + + BL_PROFILE("GMRES::GramSchmidt"); + + auto& vv_1 = m_vv[it+1]; + + Vector lhh(it+1); + + for (int j = 0; j <= it; ++j) { + m_hh (j,it) = RT(0.0); + m_hes(j,it) = RT(0.0); + } + + for (int ncnt = 0; ncnt < 2 ; ++ncnt) + { + for (int j = 0; j <= it; ++j) { + lhh[j] = m_linop->dotProduct(vv_1, m_vv[j]); + } + + for (int j = 0; j <= it; ++j) { + m_linop->increment(vv_1, m_vv[j], -lhh[j]); + m_hh (j,it) += lhh[j]; + m_hes(j,it) -= lhh[j]; + } + } +} + +template +void GMRES::update_hessenberg (int it, bool happyend, RT& res) +{ + BL_PROFILE("GMRES::update_hessenberg()"); + + for (int j = 1; j <= it; ++j) { + auto tt = m_hh(j-1,it); + m_hh(j-1,it) = m_cc[j-1] * tt + m_ss[j-1] * m_hh(j,it); + m_hh(j ,it) = m_cc[j-1] * m_hh(j,it) - m_ss[j-1] * tt; + } + + if (!happyend) + { + auto tt = std::sqrt(m_hh(it,it)*m_hh(it,it) + m_hh(it+1,it)*m_hh(it+1,it)); + m_cc[it] = m_hh(it ,it) / tt; + m_ss[it] = m_hh(it+1,it) / tt; + m_grs[it+1] = - (m_ss[it] * m_grs[it]); + m_grs[it ] = m_cc[it] * m_grs[it]; + m_hh(it,it) = m_cc[it] * m_hh(it,it) + m_ss[it] * m_hh(it+1,it); + res = std::abs(m_grs[it+1]); + } + else + { + res = RT(0.0); + } +} + +template +void GMRES::build_solution (V& a_xx, int const it) +{ + BL_PROFILE("GMRES:build_solution()"); + + if (it < 0) { return; } + + if (m_hh(it,it) != RT(0.0)) { + m_grs[it] /= m_hh(it,it); + } else { + m_grs[it] = RT(0.0); + } + + for (int ii = 1; ii <= it; ++ii) { + int k = it - ii; + auto tt = m_grs[k]; + for (int j = k+1; j <= it; ++j) { + tt -= m_hh(k,j) * m_grs[j]; + } + m_grs[k] = tt / m_hh(k,k); + } + + m_linop->setVal(*m_v_tmp_rhs, RT(0.0)); + for (int ii = 0; ii < it+1; ++ii) { + m_linop->increment(*m_v_tmp_rhs, m_vv[ii], m_grs[ii]); + } + + m_linop->precond(*m_v_tmp_lhs, *m_v_tmp_rhs); + m_linop->increment(a_xx, *m_v_tmp_lhs, RT(1.0)); +} + +template +void GMRES::compute_residual (V& a_rr, V const& a_xx, V const& a_bb) +{ + BL_PROFILE("GMRES::compute_residual()"); + m_linop->assign(*m_v_tmp_lhs, a_xx); + m_linop->apply(*m_v_tmp_rhs, *m_v_tmp_lhs); + m_linop->linComb(a_rr, RT(1.0), a_bb, RT(-1.0), *m_v_tmp_rhs); +} + +} +#endif diff --git a/Src/LinearSolvers/AMReX_GMRES_MLMG.H b/Src/LinearSolvers/AMReX_GMRES_MLMG.H new file mode 100644 index 00000000000..5106afde378 --- /dev/null +++ b/Src/LinearSolvers/AMReX_GMRES_MLMG.H @@ -0,0 +1,147 @@ +#ifndef AMREX_GMRES_MLMG_H_ +#define AMREX_GMRES_MLMG_H_ +#include + +#include +#include + +namespace amrex { + +//! Wrapping MLMG as a matrix operator for GMRES +template +class GMRESMLMGT +{ +public: + using MF = typename M::MFType; // typically MultiFab + using RT = typename MF::value_type; // double or float + + explicit GMRESMLMGT (M& mlmg); + + //! Make MultiFab without ghost cells + MF makeVecRHS () const; + + //! Make MultiFab with ghost cells and set ghost cells to zero + MF makeVecLHS () const; + + RT norm2 (MF const& mf) const; + + static void scale (MF& mf, RT scale_factor); + + RT dotProduct (MF const& mf1, MF const& mf2) const; + + //! lhs = value + static void setVal (MF& lhs, RT value); + + //! lhs = rhs + static void assign (MF& lhs, MF const& rhs); + + //! lhs += a*rhs + static void increment (MF& lhs, MF const& rhs, RT a); + + //! lhs = a*rhs_a + b*rhs_b + static void linComb (MF& lhs, RT a, MF const& rhs_a, RT b, MF const& rhs_b); + + //! lhs = L(rhs) + void apply (MF& lhs, MF const& rhs) const; + + void precond (MF& lhs, MF const& rhs) const; + + bool usePrecond (bool new_flag) { return std::exchange(m_use_precond, new_flag); } + +private: + M& m_mlmg; + MLLinOpT& m_linop; + bool m_use_precond = false; +}; + +template +GMRESMLMGT::GMRESMLMGT (M& mlmg) + : m_mlmg(mlmg), m_linop(mlmg.getLinOp()) +{ + m_mlmg.prepareLinOp(); +} + +template +auto GMRESMLMGT::makeVecRHS () const -> MF +{ + return m_linop.make(0, 0, IntVect(0)); +} + +template +auto GMRESMLMGT::makeVecLHS () const -> MF +{ + auto mf = m_linop.make(0, 0, IntVect(1)); + mf.setBndry(0); + return mf; +} + +template +auto GMRESMLMGT::norm2 (MF const& mf) const -> RT +{ + auto r = m_linop.xdoty(0, 0, mf, mf, false); + return std::sqrt(r); +} + +template +void GMRESMLMGT::scale (MF& mf, RT scale_factor) +{ + mf.mult(scale_factor, 0, mf.nComp()); +} + +template +auto GMRESMLMGT::dotProduct (MF const& mf1, MF const& mf2) const -> RT +{ + return m_linop.xdoty(0, 0, mf1, mf2, false); +} + +template +void GMRESMLMGT::setVal (MF& lhs, RT value) +{ + lhs.setVal(value); +} + +template +void GMRESMLMGT::assign (MF& lhs, MF const& rhs) +{ + MF::Copy(lhs, rhs, 0, 0, lhs.nComp(), IntVect(0)); +} + +template +void GMRESMLMGT::increment (MF& lhs, MF const& rhs, RT a) +{ + MF::Saxpy(lhs, a, rhs, 0, 0, lhs.nComp(), IntVect(0)); +} + +template +void GMRESMLMGT::linComb (MF& lhs, RT a, MF const& rhs_a, RT b, MF const& rhs_b) +{ + MF::LinComb(lhs, a, rhs_a, 0, b, rhs_b, 0, 0, lhs.nComp(), IntVect(0)); +} + +template +void GMRESMLMGT::apply (MF& lhs, MF const& rhs) const +{ + m_linop.apply(0, 0, lhs, const_cast(rhs), + MLLinOpT::BCMode::Homogeneous, + MLLinOpT::StateMode::Correction); +} + +template +void GMRESMLMGT::precond (MF& lhs, MF const& rhs) const +{ + if (m_use_precond) { + // for now, let's just do some smoothing + lhs.setVal(RT(0.0)); + for (int m = 0; m < 4; ++m) { + m_linop.smooth(0, 0, lhs, rhs, (m==0) ? true : false); + } + } else { + amrex::Copy(lhs, rhs, 0, 0, lhs.nComp(), IntVect(0)); + } +} + +using GMRESMLMG = GMRESMLMGT; + +} + +#endif diff --git a/Src/LinearSolvers/CMakeLists.txt b/Src/LinearSolvers/CMakeLists.txt index c2851d49959..76f75e06123 100644 --- a/Src/LinearSolvers/CMakeLists.txt +++ b/Src/LinearSolvers/CMakeLists.txt @@ -2,6 +2,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) # # Sources in subdirectory MLMG # + target_include_directories(amrex_${D}d PUBLIC $) target_include_directories(amrex_${D}d PUBLIC $) target_sources(amrex_${D}d @@ -53,6 +54,8 @@ foreach(D IN LISTS AMReX_SPACEDIM) MLMG/AMReX_MLNodeABecLaplacian.cpp MLMG/AMReX_MLNodeABecLap_K.H MLMG/AMReX_MLNodeABecLap_${D}D_K.H + AMReX_GMRES.H + AMReX_GMRES_MLMG.H ) if (D EQUAL 3) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index f0dca07f3ab..5fc8de10022 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -98,6 +98,7 @@ template class MLMGT; template class MLCGSolverT; template class MLPoissonT; template class MLABecLaplacianT; +template class GMRESMLMGT; template class MLLinOpT @@ -108,7 +109,9 @@ public: template friend class MLCGSolverT; template friend class MLPoissonT; template friend class MLABecLaplacianT; + template friend class GMRESMLMGT; + using MFType = MF; using FAB = typename LinOpData::fab_type; using RT = typename LinOpData::value_type; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 9bfc2f0007d..46069b7a26b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -21,6 +21,7 @@ public: template friend class MLCGSolverT; + using MFType = MF; using FAB = typename MLLinOpT::FAB; using RT = typename MLLinOpT::RT; @@ -167,6 +168,8 @@ public: void prepareForNSolve (); + void prepareLinOp (); + void oneIter (int iter); void miniCycle (int amrlev); @@ -217,6 +220,8 @@ public: [[nodiscard]] int getNumIters () const noexcept { return m_iter_fine_resnorm0.size(); } [[nodiscard]] Vector const& getNumCGIters () const noexcept { return m_niters_cg; } + MLLinOpT& getLinOp () { return linop; } + private: bool throw_exception = false; @@ -794,12 +799,7 @@ MLMGT::compResidual (const Vector& a_res, const Vector& a_sol, } } - if (!linop_prepared) { - linop.prepareForSolve(); - linop_prepared = true; - } else if (linop.needsUpdate()) { - linop.update(); - } + prepareLinOp(); const auto& amrrr = linop.AMRRefRatio(); @@ -880,12 +880,7 @@ MLMGT::apply (const Vector& out, const Vector& a_in) setVal(rh[alev], RT(0.0)); } - if (!linop_prepared) { - linop.prepareForSolve(); - linop_prepared = true; - } else if (linop.needsUpdate()) { - linop.update(); - } + prepareLinOp(); for (int alev = 0; alev < namrlevs; ++alev) { linop.applyInhomogNeumannTerm(alev, rh[alev]); @@ -1106,6 +1101,18 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& } } +template +void +MLMGT::prepareLinOp () +{ + if (!linop_prepared) { + linop.prepareForSolve(); + linop_prepared = true; + } else if (linop.needsUpdate()) { + linop.update(); + } +} + template void MLMGT::prepareForNSolve () diff --git a/Src/LinearSolvers/MLMG/Make.package b/Src/LinearSolvers/MLMG/Make.package index d66d64ec0eb..769ccad4d26 100644 --- a/Src/LinearSolvers/MLMG/Make.package +++ b/Src/LinearSolvers/MLMG/Make.package @@ -1,3 +1,5 @@ +ifndef AMREX_MLMG_MAKE + AMREX_MLMG_MAKE := 1 CEXE_sources += AMReX_MLMG.cpp @@ -87,3 +89,5 @@ endif VPATH_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/MLMG INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/MLMG + +endif diff --git a/Src/LinearSolvers/Make.package b/Src/LinearSolvers/Make.package new file mode 100644 index 00000000000..c818714cbfd --- /dev/null +++ b/Src/LinearSolvers/Make.package @@ -0,0 +1,9 @@ +CEXE_headers += AMReX_GMRES.H AMReX_GMRES_MLMG.H + +VPATH_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers +INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers + +include $(AMREX_HOME)/Src/LinearSolvers/MLMG/Make.package +ifeq ($(DIM),3) + include $(AMREX_HOME)/Src/LinearSolvers/OpenBC/Make.package +endif diff --git a/Src/LinearSolvers/OpenBC/Make.package b/Src/LinearSolvers/OpenBC/Make.package index 5fc39f69371..b2f83ed61e8 100644 --- a/Src/LinearSolvers/OpenBC/Make.package +++ b/Src/LinearSolvers/OpenBC/Make.package @@ -1,6 +1,10 @@ +ifndef AMREX_OPENBC_MAKE + AMREX_OPENBC_MAKE := 1 CEXE_headers += AMReX_OpenBC.H AMReX_OpenBC_K.H CEXE_sources += AMReX_OpenBC.cpp VPATH_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/OpenBC INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/OpenBC + +endif diff --git a/Tests/LinearSolvers/ABecLaplacian_C/GNUmakefile b/Tests/LinearSolvers/ABecLaplacian_C/GNUmakefile index 329fb9afcf2..e26200bd94f 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/GNUmakefile +++ b/Tests/LinearSolvers/ABecLaplacian_C/GNUmakefile @@ -16,7 +16,7 @@ include $(AMREX_HOME)/Tools/GNUMake/Make.defs include ./Make.package -Pdirs := Base Boundary LinearSolvers/MLMG +Pdirs := Base Boundary LinearSolvers Ppack += $(foreach dir, $(Pdirs), $(AMREX_HOME)/Src/$(dir)/Make.package) diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H index c1ed7ba4c3d..97c928a88da 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H @@ -30,6 +30,7 @@ private: void solveABecLaplacian (); void solveABecLaplacianInhomNeumann (); void solveNodeABecLaplacian (); + void solveABecLaplacianGMRES (); int max_level = 1; int ref_ratio = 2; @@ -56,6 +57,9 @@ private: bool use_hypre = false; bool use_petsc = false; + // GMRES + bool use_gmres = false; + #ifdef AMREX_USE_HYPRE int hypre_interface_i = 1; // 1. structed, 2. semi-structed, 3. ij amrex::Hypre::Interface hypre_interface = amrex::Hypre::Interface::structed; diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp index 9900ce43d7f..84fe99cb9cb 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp @@ -1,5 +1,7 @@ #include "MyTest.H" +#include +#include #include #include #include @@ -20,7 +22,11 @@ MyTest::solve () if (prob_type == 1) { solvePoisson(); } else if (prob_type == 2) { - solveABecLaplacian(); + if (use_gmres) { + solveABecLaplacianGMRES(); + } else { + solveABecLaplacian(); + } } else if (prob_type == 3) { solveABecLaplacianInhomNeumann(); } else if (prob_type == 4) { @@ -460,6 +466,86 @@ MyTest::solveNodeABecLaplacian () } } +void +MyTest::solveABecLaplacianGMRES () +{ + LPInfo info; + info.setMaxCoarseningLevel(0); + + const auto tol_rel = Real(1.e-6); + const auto tol_abs = Real(0.0); + + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(composite_solve == false, + "solveABecLaplacianGMRES does not support composite solve"); + + const auto nlevels = static_cast(geom.size()); + + for (int ilev = 0; ilev < nlevels; ++ilev) + { + MLABecLaplacian mlabec({geom[ilev]}, {grids[ilev]}, {dmap[ilev]}, info); + + mlabec.setMaxOrder(linop_maxorder); + + // This is a 3d problem with homogeneous Neumann BC + mlabec.setDomainBC({AMREX_D_DECL(LinOpBCType::Neumann, + LinOpBCType::Neumann, + LinOpBCType::Neumann)}, + {AMREX_D_DECL(LinOpBCType::Neumann, + LinOpBCType::Neumann, + LinOpBCType::Neumann)}); + + if (ilev > 0) { + mlabec.setCoarseFineBC(&solution[ilev-1], ref_ratio); + } + + // for problem with pure homogeneous Neumann BC, we could pass a nullptr + mlabec.setLevelBC(0, nullptr); + + mlabec.setScalars(ascalar, bscalar); + + mlabec.setACoeffs(0, acoef[ilev]); + + Array face_bcoef; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) + { + const BoxArray& ba = amrex::convert(bcoef[ilev].boxArray(), + IntVect::TheDimensionVector(idim)); + face_bcoef[idim].define(ba, bcoef[ilev].DistributionMap(), 1, 0); + } + amrex::average_cellcenter_to_face(GetArrOfPtrs(face_bcoef), + bcoef[ilev], geom[ilev]); + mlabec.setBCoeffs(0, amrex::GetArrOfConstPtrs(face_bcoef)); + + MultiFab res(rhs[ilev].boxArray(), rhs[ilev].DistributionMap(), 1, 0); + + MLMG mlmg(mlabec); + mlmg.setVerbose(verbose); + mlmg.apply({&res}, {&solution[ilev]}); // res = L(sol) + + MultiFab::Subtract(res, rhs[ilev], 0, 0, 1, 0); // now res = L(sol) - rhs + + MultiFab cor(rhs[ilev].boxArray(), rhs[ilev].DistributionMap(), 1, 0); + + using M = GMRESMLMG; + M mat(mlmg); + mat.usePrecond(true); + + GMRES gmres; + gmres.setVerbose(verbose); + gmres.define(mat); + gmres.solve(cor, res, tol_rel, tol_abs); // solve L(cor) = res + + MultiFab::Subtract(solution[ilev], cor, 0, 0, 1, 0); + + mlmg.apply({&res}, {&solution[ilev]}); // res = L(sol) + MultiFab::Subtract(res, rhs[ilev], 0, 0, 1, 0); // now res = L(sol) - rhs + if (verbose) { + amrex::Print() << "Final residual = " << res.norminf(0) + << " " << res.norm1(0) << " " << res.norm2(0) << std::endl; + } + } +} + void MyTest::readParameters () { @@ -484,6 +570,9 @@ MyTest::readParameters () pp.query("max_coarsening_level", max_coarsening_level); pp.query("max_semicoarsening_level", max_semicoarsening_level); + pp.query("use_gmres", use_gmres); + AMREX_ALWAYS_ASSERT(use_gmres == false || prob_type == 2); + #ifdef AMREX_USE_HYPRE pp.query("use_hypre", use_hypre); pp.query("hypre_interface", hypre_interface_i); diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp b/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp index 4473f978a85..6dbb2c55b59 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp @@ -77,7 +77,8 @@ MyTest::writePlotfile () const Real dvol = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); amrex::Print() << "Level " << ilev << " max-norm error: " << plotmf[ilev].norminf(3) - << " 1-norm error: " << plotmf[ilev].norm1(3)*dvol << std::endl; + << " 1-norm error: " << plotmf[ilev].norm1(3)*dvol + << " 2-norm error: " << plotmf[ilev].norm2(3)*dvol << std::endl; } WriteMultiLevelPlotfile("plot", nlevels, amrex::GetVecOfConstPtrs(plotmf), diff --git a/Tests/LinearSolvers/ABecLaplacian_C/inputs.gmres b/Tests/LinearSolvers/ABecLaplacian_C/inputs.gmres new file mode 100644 index 00000000000..17e7767dffa --- /dev/null +++ b/Tests/LinearSolvers/ABecLaplacian_C/inputs.gmres @@ -0,0 +1,17 @@ + +max_level = 0 +ref_ratio = 2 +n_cell = 128 +max_grid_size = 64 + +composite_solve = 0 # composite solve or level by level? + +prob_type = 2 + +use_gmres = 1 + +verbose = 2 + +amrex.fpe_trap_invalid = 1 +amrex.fpe_trap_zero = 1 +amrex.fpe_trap_overflow = 1 From 255d30f387cf2c1a7eff5a31f703c94de803e8d8 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 12 Jan 2024 14:48:35 -0800 Subject: [PATCH 083/186] BaseFab::lockAdd: Faster version of BaseFab::atomicAdd for OpenMP (#3696) For WarpX's Gordon Bell runs on Fugaku, Stephan Jaure of ATOS optimized atomicAdd using pthread spin locks. This commit implements Stephan's approach using OpenMP. In AMReX::Initialize, we create a number of OMP locks. When BaseFab::lockAdd is called, we loop over planes in z-direction and try to acquire a lock with omp_test_lock. If it's successful, we can access the data in that z-plane without worrying about race conditions. This allows us to use simd instructions instead of omp atomic adds. If it's not successful, we will try a different z-plane. The process stops till all planes are processed. --- Src/Base/AMReX.cpp | 6 ++- Src/Base/AMReX_BaseFab.H | 111 ++++++++++++++++++++++++++++++++++++-- Src/Base/AMReX_OpenMP.H | 9 +++- Src/Base/AMReX_OpenMP.cpp | 32 ++++++++++- 4 files changed, 150 insertions(+), 8 deletions(-) diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp index 4449dab1955..90d2a4ee8cd 100644 --- a/Src/Base/AMReX.cpp +++ b/Src/Base/AMReX.cpp @@ -462,7 +462,7 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, #endif #ifdef AMREX_USE_OMP - amrex::OpenMP::init_threads(); + amrex::OpenMP::Initialize(); // status output if (system::verbose > 0) { @@ -817,6 +817,10 @@ amrex::Finalize (amrex::AMReX* pamrex) Gpu::Device::Finalize(); #endif +#ifdef AMREX_USE_OMP + amrex::OpenMP::Finalize(); +#endif + #if defined(AMREX_USE_UPCXX) upcxx::finalize(); #endif diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index eb8e5c59615..c4820bbe923 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -20,10 +20,8 @@ #include #include #include - -#ifdef AMREX_USE_OMP -#include -#endif +#include +#include #include #include @@ -979,6 +977,19 @@ public: BaseFab& atomicAdd (const BaseFab& src, const Box& srcbox, const Box& destbox, int srccomp, int destcomp, int numcomp=1) noexcept; + /** + * \brief Atomically add srcbox region of src FAB to destbox region of this FAB. + * The srcbox and destbox must be same size. When OMP is on, this uses OMP locks + * in the implementation and it's usually faster than atomicAdd. + */ +#if defined(AMREX_USE_GPU) + template +#else + template +#endif + BaseFab& lockAdd (const BaseFab& src, const Box& srcbox, const Box& destbox, + int srccomp, int destcomp, int numcomp) noexcept; + //! FAB SAXPY (y[i] <- y[i] + a * x[i]), in place. #if defined(AMREX_USE_GPU) template @@ -3300,6 +3311,98 @@ BaseFab::atomicAdd (const BaseFab& src, const Box& srcbox, const Box& dest return *this; } +template +template +BaseFab& +BaseFab::lockAdd (const BaseFab& src, const Box& srcbox, const Box& destbox, + int srccomp, int destcomp, int numcomp) noexcept +{ +#if defined(AMREX_USE_OMP) && (AMREX_SPACEDIM > 1) +#if defined(AMREX_USE_GPU) + if (run_on == RunOn::Host || Gpu::notInLaunchRegion()) { +#endif + BL_ASSERT(destbox.ok()); + BL_ASSERT(src.box().contains(srcbox)); + BL_ASSERT(box().contains(destbox)); + BL_ASSERT(destbox.sameSize(srcbox)); + BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp()); + BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp()); + + Array4 const& d = this->array(); + Array4 const& s = src.const_array(); + auto const& dlo = destbox.smallEnd(); +#if (AMREX_SPACEDIM == 3) + auto const& dhi = destbox.bigEnd(); +#endif + auto const& slo = srcbox.smallEnd(); + auto const offset = slo - dlo; + auto const lenx = srcbox.length(0); + + auto const nplanes = srcbox.length(AMREX_SPACEDIM-1); + auto* mask = (bool*) amrex_mempool_alloc(sizeof(bool)*nplanes); + for (int ip = 0; ip < nplanes; ++ip) { + mask[ip] = false; + } + + int mm = 0; + int planes_left = nplanes; + while (planes_left > 0) { + AMREX_ASSERT(mm < nplanes); + auto const m = mm + dlo[AMREX_SPACEDIM-1]; + int ilock = m % OpenMP::nlocks; + if (ilock < 0) { ilock += OpenMP::nlocks; } + auto* lock = &(OpenMP::omp_locks[ilock]); + if (omp_test_lock(lock)) + { + for (int n = 0; n < numcomp; ++n) + { +#if (AMREX_SPACEDIM == 3) + for (int j = dlo[1]; j <= dhi[1]; ++j) + { + IntVect div(dlo[0], j, m); +#elif (AMREX_SPACEDIM == 2) + { + IntVect div(dlo[0], m); +#endif + auto * pdst = d.ptr(div ,n+destcomp); + auto const* psrc = s.ptr(div+offset,n+srccomp); +#pragma omp simd + for (int ii = 0; ii < lenx; ++ii) { + pdst[ii] += psrc[ii]; + } + } + } + + mask[mm] = true; + --planes_left; + omp_unset_lock(lock); + if (planes_left == 0) { break; } + } + + ++mm; + for (int ip = 0; ip < nplanes; ++ip) { + int new_mm = (mm+ip) % nplanes; + if ( ! mask[new_mm] ) { + mm = new_mm; + break; + } + } + } + + amrex_mempool_free(mask); + + return *this; + +#if defined(AMREX_USE_GPU) + } else { + return this->template atomicAdd(src, srcbox, destbox, srccomp, destcomp, numcomp); + } +#endif +#else + return this->template atomicAdd(src, srcbox, destbox, srccomp, destcomp, numcomp); +#endif +} + template template BaseFab& diff --git a/Src/Base/AMReX_OpenMP.H b/Src/Base/AMReX_OpenMP.H index ce267b9be73..2b53ea8c6ec 100644 --- a/Src/Base/AMReX_OpenMP.H +++ b/Src/Base/AMReX_OpenMP.H @@ -3,7 +3,9 @@ #include #ifdef AMREX_USE_OMP +#include #include +#include namespace amrex::OpenMP { @@ -13,7 +15,11 @@ namespace amrex::OpenMP { inline int in_parallel () { return omp_in_parallel(); } inline void set_num_threads (int num) { omp_set_num_threads(num); } - void init_threads (); + void Initialize (); + void Finalize (); + + static constexpr int nlocks = 128; + extern AMREX_EXPORT std::array omp_locks; } #else // AMREX_USE_OMP @@ -25,7 +31,6 @@ namespace amrex::OpenMP { constexpr int get_thread_num () { return 0; } constexpr int in_parallel () { return false; } constexpr void set_num_threads (int) { /* nothing */ } - constexpr void init_threads () { /* nothing */ } } #endif // AMREX_USE_OMP diff --git a/Src/Base/AMReX_OpenMP.cpp b/Src/Base/AMReX_OpenMP.cpp index c0c33ce962f..15bb1246071 100644 --- a/Src/Base/AMReX_OpenMP.cpp +++ b/Src/Base/AMReX_OpenMP.cpp @@ -135,8 +135,19 @@ namespace amrex #ifdef AMREX_USE_OMP namespace amrex::OpenMP { - void init_threads () + std::array omp_locks; + + namespace { + unsigned int initialized = 0; + } + + void Initialize () { + if (initialized) { + ++initialized; + return; + } + amrex::ParmParse pp("amrex"); std::string omp_threads = "system"; pp.queryAdd("omp_threads", omp_threads); @@ -173,6 +184,25 @@ namespace amrex::OpenMP } } } + + for (auto& lck : omp_locks) { + omp_init_lock(&lck); + } + + ++initialized; } + + void Finalize () + { + if (initialized) { + --initialized; + if (initialized == 0) { + for (auto& lck : omp_locks) { + omp_destroy_lock(&lck); + } + } + } + } + } // namespace amrex::OpenMP #endif // AMREX_USE_OMP From b96b731d4a6fa496f80e54c2c0476a50b9dd4e86 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 15 Jan 2024 13:11:26 -0500 Subject: [PATCH 084/186] use amrex::Gpu::memcpy for packParticleIDs (#3699) std::memcpy does not exist in HIP, so we use amrex::Gpu::memcpy instead for packParticleIDs when writing binary particle data. --- Src/Particle/AMReX_WriteBinaryParticleData.H | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index d3cafc5be58..105a254a348 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -157,8 +157,8 @@ void packParticleIDs (I* idata, const P& p, bool is_checkpoint) noexcept std::uint32_t xu, yu; xu = (std::uint32_t)((p.m_idcpu & 0xFFFFFFFF00000000LL) >> 32); yu = (std::uint32_t)( p.m_idcpu & 0xFFFFFFFFLL); - std::memcpy(&xi, &xu, sizeof(xu)); - std::memcpy(&yi, &yu, sizeof(yu)); + amrex::Gpu::memcpy(&xi, &xu, sizeof(xu)); + amrex::Gpu::memcpy(&yi, &yu, sizeof(yu)); idata[0] = xi; idata[1] = yi; } else { From 03f59365ab29fc619d836a5f0fd3e9249eeacd89 Mon Sep 17 00:00:00 2001 From: Luca Fedeli Date: Tue, 16 Jan 2024 06:00:31 +0100 Subject: [PATCH 085/186] Add a linspace-like function in AMReX_Algorithm.H (#3698) ## Summary I would like to propose to add a linspace-like function in AMReX_Algorithm.H , since filling a container with equally spaced numbers is a task that occurs rather frequently. This PR proposes a possible implementation. The container is modified in place only if it has at least 2 elements. ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [X] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_Algorithm.H | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Src/Base/AMReX_Algorithm.H b/Src/Base/AMReX_Algorithm.H index 31889df4425..98f5f7d5a38 100644 --- a/Src/Base/AMReX_Algorithm.H +++ b/Src/Base/AMReX_Algorithm.H @@ -209,6 +209,24 @@ namespace amrex )) } + template::value_type>::value && + std::is_floating_point::value, + int>::type = 0> + AMREX_GPU_HOST_DEVICE + void linspace (ItType first, const ItType& last, const ValType& start, const ValType& stop) + { + const std::ptrdiff_t count = last-first; + if (count >= 2){ + const auto delta = (stop - start)/(count - 1); + for (std::ptrdiff_t i = 0; i < count-1; ++i){ + *(first++) = start + i*delta; + } + *first = stop; + } + } + namespace detail { struct clzll_tag {}; From 9b799efb04e06912be7c051861cd60a0d7b82579 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Tue, 16 Jan 2024 16:44:51 -0800 Subject: [PATCH 086/186] Add macro for loop unrolling across compilers (#3701) --- Src/Base/AMReX_Extension.H | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Src/Base/AMReX_Extension.H b/Src/Base/AMReX_Extension.H index cc299c0823e..c02adcdff7e 100644 --- a/Src/Base/AMReX_Extension.H +++ b/Src/Base/AMReX_Extension.H @@ -133,6 +133,18 @@ #define AMREX_NO_INLINE #endif +// unroll loop +#define AMREX_TO_STRING_HELPER(X) #X +#define AMREX_TO_STRING(X) AMREX_TO_STRING_HELPER(X) + +#if defined(__clang__) || defined(__CUDACC__) || defined(__HIP__) || defined(__INTEL_CLANG_COMPILER) +#define AMREX_UNROLL_LOOP(n) _Pragma(AMREX_TO_STRING(unroll (n))) +#elif defined(__GNUC__) +#define AMREX_UNROLL_LOOP(n) _Pragma(AMREX_TO_STRING(GCC unroll (n))) +#else +#define AMREX_UNROLL_LOOP(n) +#endif + // __attribute__((weak)) #if defined(AMREX_TYPECHECK) From 53a581cc802734846f1c102e593bddb828922cf0 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Fri, 19 Jan 2024 12:43:59 -0500 Subject: [PATCH 087/186] add a dependabot.yml to checkout github actions versions (#3704) --- .github/dependabot.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000000..b4f368c6680 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +# Dependabot configuration +# ref: https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" From cf287fe1c176a0c04bef3f9e3fe59defd1b093ec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:12:58 -0800 Subject: [PATCH 088/186] Bump JamesIves/github-pages-deploy-action from 3.7.1 to 4.5.0 (#3705) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [JamesIves/github-pages-deploy-action](https://github.com/jamesives/github-pages-deploy-action) from 3.7.1 to 4.5.0.
Release notes

Sourced from JamesIves/github-pages-deploy-action's releases.

v4.5.0

What's Changed

New Contributors

Full Changelog: https://github.com/JamesIves/github-pages-deploy-action/compare/v4.4.3...v4.5.0

v4.4.3

What's Changed

... (truncated)

Commits
  • 65b5dfd Deploy Production Code for Commit 32bb2a8d7bda92f28b1fd223fc5793ab27f725eb 🚀
  • 32bb2a8 Merge branch 'dev' into releases/v4
  • 7879c16 feat: update to Node 20
  • 1c34508 build(deps-dev): bump @​types/node from 20.9.0 to 20.10.0 (#1479)
  • b957d05 Bump actions/checkout from 3 to 4 (#1429)
  • 3a4632d build(deps-dev): bump @​types/node from 20.4.9 to 20.9.0 (#1474)
  • 9de0269 build(deps): bump actions/setup-node from 3 to 4 (#1467)
  • 810af53 Update README.md
  • 24c1e2a doc: .gitignore matches not being deployed (#1459)
  • 0769273 fix: remove base.Dockerfile as it's not needed
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=JamesIves/github-pages-deploy-action&package-manager=github_actions&previous-version=3.7.1&new-version=4.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 82e387cbff4..147769cf3b7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -27,7 +27,7 @@ jobs: - name: Deploy if: github.event_name == 'push' && github.repository == 'AMReX-Codes/amrex' && github.ref == 'refs/heads/development' - uses: JamesIves/github-pages-deploy-action@3.7.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ACCESS_TOKEN: ${{ secrets.DEPLOY_DOCS }} From 03bf4cf0485e280074a94f3574ac3f916f19e667 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:14:52 -0800 Subject: [PATCH 089/186] Bump actions/cache from 3 to 4 (#3706) --- .github/workflows/apps.yml | 4 ++-- .github/workflows/bittree.yml | 4 ++-- .github/workflows/clang.yml | 6 +++--- .github/workflows/codeql.yml | 2 +- .github/workflows/cuda.yml | 8 ++++---- .github/workflows/gcc.yml | 26 +++++++++++++------------- .github/workflows/hip.yml | 8 ++++---- .github/workflows/hypre.yml | 6 +++--- .github/workflows/intel.yml | 10 +++++----- .github/workflows/macos.yml | 4 ++-- .github/workflows/petsc.yml | 2 +- .github/workflows/smoke.yml | 2 +- .github/workflows/sundials.yml | 4 ++-- 13 files changed, 43 insertions(+), 43 deletions(-) diff --git a/.github/workflows/apps.yml b/.github/workflows/apps.yml index 45afcab4d77..90635d1d772 100644 --- a/.github/workflows/apps.yml +++ b/.github/workflows/apps.yml @@ -35,7 +35,7 @@ jobs: .github/workflows/dependencies/dependencies.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -77,7 +77,7 @@ jobs: .github/workflows/dependencies/dependencies.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/bittree.yml b/.github/workflows/bittree.yml index 687bf07c00d..e487c90076b 100644 --- a/.github/workflows/bittree.yml +++ b/.github/workflows/bittree.yml @@ -18,7 +18,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 15 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -70,7 +70,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 15 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index a343832b510..cd12df004f6 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -22,7 +22,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -82,7 +82,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 14 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -138,7 +138,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 14 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index f240930bd8b..44754b636f9 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -41,7 +41,7 @@ jobs: - name: Set Up Cache if: ${{ matrix.language == 'cpp' }} - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 3fe50a5a02d..06da80f01df 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -18,7 +18,7 @@ jobs: .github/workflows/dependencies/dependencies_nvcc.sh 11.2 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -77,7 +77,7 @@ jobs: .github/workflows/dependencies/dependencies_llvm_cuda11_clang15.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -121,7 +121,7 @@ jobs: .github/workflows/dependencies/dependencies_nvhpc.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -181,7 +181,7 @@ jobs: .github/workflows/dependencies/dependencies_nvcc.sh 11.2 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index aca7a9c8723..2def95251ce 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -23,7 +23,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -79,7 +79,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -127,7 +127,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -175,7 +175,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -225,7 +225,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -284,7 +284,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -341,7 +341,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 14 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -405,7 +405,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -445,7 +445,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 15 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -485,7 +485,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -525,7 +525,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -565,7 +565,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -604,7 +604,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 12 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index 0c0e59a96c4..628e6940fc3 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -17,7 +17,7 @@ jobs: .github/workflows/dependencies/dependencies_hip.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -81,7 +81,7 @@ jobs: .github/workflows/dependencies/dependencies_hip.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -144,7 +144,7 @@ jobs: .github/workflows/dependencies/dependencies_hip.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -175,7 +175,7 @@ jobs: .github/workflows/dependencies/dependencies_hip.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/hypre.yml b/.github/workflows/hypre.yml index 871224fc79c..c998fe0de63 100644 --- a/.github/workflows/hypre.yml +++ b/.github/workflows/hypre.yml @@ -20,7 +20,7 @@ jobs: sudo apt-get install -y libcublas-dev-11-2 libcusparse-dev-11-2 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: | ~/.cache/ccache @@ -69,7 +69,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 14 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -118,7 +118,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 14 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index aa1036ddf94..1cd44e516c8 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -17,7 +17,7 @@ jobs: .github/workflows/dependencies/dependencies_dpcpp.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -61,7 +61,7 @@ jobs: .github/workflows/dependencies/dependencies_dpcpp.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -107,7 +107,7 @@ jobs: .github/workflows/dependencies/dependencies_codeplay.sh oneapi-nvidia-12.0 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -153,7 +153,7 @@ jobs: .github/workflows/dependencies/dependencies_codeplay.sh oneapi-amd-5.4.3 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -199,7 +199,7 @@ jobs: sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 1c405c2d66d..b2877f109be 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -16,7 +16,7 @@ jobs: - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /Users/runner/Library/Caches/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -57,7 +57,7 @@ jobs: - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /Users/runner/Library/Caches/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/petsc.yml b/.github/workflows/petsc.yml index eaddf1c2489..ba5845e32e0 100644 --- a/.github/workflows/petsc.yml +++ b/.github/workflows/petsc.yml @@ -18,7 +18,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 14 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index d907b485261..47e1736f5dd 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -18,7 +18,7 @@ jobs: .github/workflows/dependencies/dependencies_clang-tidy.sh 15 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} diff --git a/.github/workflows/sundials.yml b/.github/workflows/sundials.yml index a890e10fad3..2649bec0b78 100644 --- a/.github/workflows/sundials.yml +++ b/.github/workflows/sundials.yml @@ -21,7 +21,7 @@ jobs: .github/workflows/dependencies/dependencies.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -81,7 +81,7 @@ jobs: .github/workflows/dependencies/dependencies_nvcc.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} From abc1cfb268a8b3dd011e6d431963054a7eb55037 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 14:00:57 -0800 Subject: [PATCH 090/186] Bump actions/checkout from 3 to 4 (#3707) --- .github/workflows/apps.yml | 10 ++++---- .github/workflows/ascent.yml | 2 +- .github/workflows/clang.yml | 6 ++--- .github/workflows/cleanup-cache-postpr.yml | 2 +- .github/workflows/cleanup-cache.yml | 2 +- .github/workflows/codeql.yml | 2 +- .github/workflows/codespell.yml | 2 +- .github/workflows/cuda.yml | 8 +++---- .github/workflows/docs.yml | 2 +- .github/workflows/gcc.yml | 28 +++++++++++----------- .github/workflows/hip.yml | 8 +++---- .github/workflows/hypre.yml | 6 ++--- .github/workflows/intel.yml | 10 ++++---- .github/workflows/macos.yml | 4 ++-- .github/workflows/petsc.yml | 2 +- .github/workflows/sensei.yml | 2 +- .github/workflows/smoke.yml | 2 +- .github/workflows/style.yml | 6 ++--- .github/workflows/sundials.yml | 4 ++-- .github/workflows/windows.yml | 6 ++--- 20 files changed, 57 insertions(+), 57 deletions(-) diff --git a/.github/workflows/apps.yml b/.github/workflows/apps.yml index 90635d1d772..15805162571 100644 --- a/.github/workflows/apps.yml +++ b/.github/workflows/apps.yml @@ -11,7 +11,7 @@ jobs: name: Castro runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get Latest Release Tag run: | CASTRO_TAG=$(wget https://github.com/AMReX-Astro/Castro/releases/latest 2>&1 | grep Location: | awk '{print $2}' | awk -F/ '{print $NF}') @@ -19,13 +19,13 @@ jobs: MICROPHYSICS_TAG=$(wget https://github.com/AMReX-Astro/Microphysics/releases/latest 2>&1 | grep Location: | awk '{print $2}' | awk -F/ '{print $NF}') echo "MICROPHYSICS_TAG=$MICROPHYSICS_TAG" >> $GITHUB_ENV - name: Download Castro - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: 'AMReX-Astro/Castro' ref: ${{env.CASTRO_TAG}} path: 'Castro' - name: Download Microphysics - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: 'AMReX-Astro/Microphysics' ref: ${{env.MICROPHYSICS_TAG}} @@ -61,13 +61,13 @@ jobs: name: WarpX runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get Latest Release Tag run: | WARPX_TAG=$(wget https://github.com/ECP-WarpX/WarpX/releases/latest 2>&1 | grep Location: | awk '{print $2}' | awk -F/ '{print $NF}') echo "WARPX_TAG=$WARPX_TAG" >> $GITHUB_ENV - name: Download WarpX - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: 'ECP-WarpX/WarpX' ref: ${{env.WARPX_TAG}} diff --git a/.github/workflows/ascent.yml b/.github/workflows/ascent.yml index d8217621a1b..6c36970eda8 100644 --- a/.github/workflows/ascent.yml +++ b/.github/workflows/ascent.yml @@ -18,7 +18,7 @@ jobs: container: image: alpinedav/ascent:0.9.2 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Configure run: | . /ascent_docker_setup_env.sh diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index cd12df004f6..d6ba9c72e32 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -15,7 +15,7 @@ jobs: name: Clang@7.0 C++17 SP NOMPI Debug [lib] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_clang.sh 7 @@ -75,7 +75,7 @@ jobs: name: Clang@14.0 C++17 SP Particles DP Mesh Debug [tests] runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_clang.sh 14 @@ -131,7 +131,7 @@ jobs: name: Clang NOMPI Release [configure 2D] runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_clang.sh 14 diff --git a/.github/workflows/cleanup-cache-postpr.yml b/.github/workflows/cleanup-cache-postpr.yml index 978e9c28f04..9a2ffb0f61a 100644 --- a/.github/workflows/cleanup-cache-postpr.yml +++ b/.github/workflows/cleanup-cache-postpr.yml @@ -16,7 +16,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Clean up ccache run: | gh extension install actions/gh-actions-cache diff --git a/.github/workflows/cleanup-cache.yml b/.github/workflows/cleanup-cache.yml index cafae38bf1c..d18acbaa788 100644 --- a/.github/workflows/cleanup-cache.yml +++ b/.github/workflows/cleanup-cache.yml @@ -16,7 +16,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Clean up ccache run: | gh extension install actions/gh-actions-cache diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 44754b636f9..030d8f5cef7 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -29,7 +29,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Packages (C++) if: ${{ matrix.language == 'cpp' }} diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 491b839c00e..bec24c5c7ca 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install codespell run: | diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 06da80f01df..b0821a2ed03 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -12,7 +12,7 @@ jobs: name: CUDA@11.2 GNU@9.3.0 C++17 Release [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_nvcc.sh 11.2 @@ -71,7 +71,7 @@ jobs: CUDAARCHS: "70" AMReX_CUDA_ARCH: "7.0" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_llvm_cuda11_clang15.sh @@ -114,7 +114,7 @@ jobs: name: NVHPC NVCC/NVC++ C++17 Release [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/ubuntu_free_disk_space.sh @@ -175,7 +175,7 @@ jobs: name: CUDA@11.2 GNU@9.3.0 [configure 3D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_nvcc.sh 11.2 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 147769cf3b7..e4452b9f6f9 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 2def95251ce..83d840b95c7 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -16,7 +16,7 @@ jobs: name: GNU@8.4 C++17 Release [lib] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_gcc.sh 8 @@ -72,7 +72,7 @@ jobs: name: GNU@9.3 C++17 3D Debug Fortran [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -120,7 +120,7 @@ jobs: name: GNU@9.3 C++17 2D Debug Fortran [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -168,7 +168,7 @@ jobs: name: GNU@9.3 C++17 1D Debug Fortran [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -218,7 +218,7 @@ jobs: name: GNU@10.1 C++20 OMP [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_gcc.sh 10 @@ -277,7 +277,7 @@ jobs: name: GNU@8.4 C++17 NOMPI [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_gcc.sh 8 @@ -334,7 +334,7 @@ jobs: name: GNU@12 C++17 w/o Fortran [tests] runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_gcc.sh 12 @@ -398,7 +398,7 @@ jobs: name: GNU@9.3 Release [configure 1D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -438,7 +438,7 @@ jobs: name: GNU@11.2 Release [configure 3D] runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -478,7 +478,7 @@ jobs: name: GNU@9.3 Release [configure 3D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -518,7 +518,7 @@ jobs: name: GNU@9.3 OMP Debug [configure 3D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -558,7 +558,7 @@ jobs: name: GNU Plotfile Tools [tools] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -597,7 +597,7 @@ jobs: name: GNU@9.3 C++17 [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -649,7 +649,7 @@ jobs: CXX: h5pcc CC: h5cc steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index 628e6940fc3..07b815a00eb 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -11,7 +11,7 @@ jobs: name: HIP ROCm Flang C++17 [tests] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_hip.sh @@ -75,7 +75,7 @@ jobs: name: HIP ROCm GFortran@9.3 C++17 [tests-hipcc] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_hip.sh @@ -138,7 +138,7 @@ jobs: name: HIP EB [configure 2D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_hip.sh @@ -169,7 +169,7 @@ jobs: name: HIP EB 3D GMake runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_hip.sh diff --git a/.github/workflows/hypre.yml b/.github/workflows/hypre.yml index c998fe0de63..e9b86fa9167 100644 --- a/.github/workflows/hypre.yml +++ b/.github/workflows/hypre.yml @@ -13,7 +13,7 @@ jobs: env: AMREX_HYPRE_HOME: ${HOME}/.cache/hypre-2.26.0-cuda steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_nvcc.sh 11.2 @@ -62,7 +62,7 @@ jobs: name: GCC 3D Hypre@2.21.0 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -111,7 +111,7 @@ jobs: name: GCC EB 2D Hypre@2.28.0 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 1cd44e516c8..ee1b7503188 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -11,7 +11,7 @@ jobs: name: oneAPI SYCL [tests] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_dpcpp.sh @@ -55,7 +55,7 @@ jobs: name: oneAPI SYCL [tests w/ EB] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_dpcpp.sh @@ -98,7 +98,7 @@ jobs: name: oneAPI SYCL for Nvidia GPUs [tests w/ EB] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/ubuntu_free_disk_space.sh @@ -144,7 +144,7 @@ jobs: name: oneAPI SYCL for AMD GPUs runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/ubuntu_free_disk_space.sh @@ -191,7 +191,7 @@ jobs: name: ICC [tests] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/ubuntu_free_disk_space.sh diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index b2877f109be..5917c7bd074 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -12,7 +12,7 @@ jobs: name: AppleClang Universal w/o MPI [tests-universal] runs-on: macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Set Up Cache @@ -53,7 +53,7 @@ jobs: name: AppleClang@11.0 GFortran@9.3 [tests] runs-on: macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Set Up Cache diff --git a/.github/workflows/petsc.yml b/.github/workflows/petsc.yml index ba5845e32e0..0d42d7984c6 100644 --- a/.github/workflows/petsc.yml +++ b/.github/workflows/petsc.yml @@ -11,7 +11,7 @@ jobs: name: GCC 2D EB PETSc@3.18.1 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh diff --git a/.github/workflows/sensei.yml b/.github/workflows/sensei.yml index fc5a0db3059..711c22588cf 100644 --- a/.github/workflows/sensei.yml +++ b/.github/workflows/sensei.yml @@ -21,7 +21,7 @@ jobs: container: image: senseiinsitu/ci:fedora35-amrex-20220613 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup run: mkdir build - name: Configure diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 47e1736f5dd..2d9b5fbe457 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -11,7 +11,7 @@ jobs: name: GNU Multi-D runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 8cd87d66e87..00a86b00ac4 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -10,21 +10,21 @@ jobs: tabs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tabs run: .github/workflows/style/check_tabs.sh trailing_whitespaces: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Trailing Whitespaces run: .github/workflows/style/check_trailing_whitespaces.sh doxygen: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Doxygen run: | sudo apt-get install -y --no-install-recommends doxygen diff --git a/.github/workflows/sundials.yml b/.github/workflows/sundials.yml index 2649bec0b78..43c04da1cec 100644 --- a/.github/workflows/sundials.yml +++ b/.github/workflows/sundials.yml @@ -15,7 +15,7 @@ jobs: CCACHE_COMPRESSLEVEL: 10 CCACHE_MAXSIZE: 20M steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh @@ -75,7 +75,7 @@ jobs: CCACHE_COMPRESSLEVEL: 10 CCACHE_MAXSIZE: 75M steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: | .github/workflows/dependencies/dependencies_nvcc.sh diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 18dbce32b5b..f8ffd4e5257 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -12,7 +12,7 @@ jobs: name: MSVC C++17 w/o Fortran w/o MPI runs-on: windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: seanmiddleditch/gha-setup-ninja@master #- name: Set Up Cache # uses: actions/cache@v3 @@ -56,7 +56,7 @@ jobs: name: MSVC C++17 w/o Fortran w/o MPI static runs-on: windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: seanmiddleditch/gha-setup-ninja@master #- name: Set Up Cache # uses: actions/cache@v3 @@ -98,7 +98,7 @@ jobs: name: Clang C++17 w/o Fortran w/o MPI runs-on: windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: seanmiddleditch/gha-setup-ninja@master - name: Build & Install shell: cmd From 1647cff047c59cffb1acea56c9fbe5be1d9d58e9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 14:39:58 -0800 Subject: [PATCH 091/186] Bump github/codeql-action from 2 to 3 (#3708) --- .github/workflows/codeql.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 030d8f5cef7..d5feb7491d0 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -59,14 +59,14 @@ jobs: -DCMAKE_CXX_COMPILER="/usr/local/bin/g++" - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} queries: +security-and-quality config-file: ./.github/workflows/codeql/codeql-config.yml - name: Build (py) - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 if: ${{ matrix.language == 'python' }} - name: Build (C++) @@ -89,7 +89,7 @@ jobs: make -j 2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 with: category: "/language:${{ matrix.language }}" From 9996b68e9ce9e1d995cbc894b591eb5217227cc0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 23:09:07 +0000 Subject: [PATCH 092/186] Bump actions/upload-artifact from 3 to 4 (#3709) --- .github/workflows/apps.yml | 2 +- .github/workflows/bittree.yml | 2 +- .github/workflows/clang.yml | 2 +- .github/workflows/codeql.yml | 2 +- .github/workflows/cuda.yml | 2 +- .github/workflows/gcc.yml | 2 +- .github/workflows/hip.yml | 2 +- .github/workflows/hypre.yml | 2 +- .github/workflows/intel.yml | 2 +- .github/workflows/macos.yml | 2 +- .github/workflows/petsc.yml | 2 +- .github/workflows/post-pr.yml | 2 +- .github/workflows/smoke.yml | 2 +- .github/workflows/sundials.yml | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/apps.yml b/.github/workflows/apps.yml index 15805162571..a8150fa871e 100644 --- a/.github/workflows/apps.yml +++ b/.github/workflows/apps.yml @@ -111,7 +111,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/bittree.yml b/.github/workflows/bittree.yml index e487c90076b..75b154376ba 100644 --- a/.github/workflows/bittree.yml +++ b/.github/workflows/bittree.yml @@ -120,7 +120,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index d6ba9c72e32..0f3fbc35c99 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -175,7 +175,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index d5feb7491d0..d2e1e98b881 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -102,7 +102,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index b0821a2ed03..15812b6e287 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -215,7 +215,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 83d840b95c7..e4d40029cfb 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -673,7 +673,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index 07b815a00eb..189d247211c 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -203,7 +203,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/hypre.yml b/.github/workflows/hypre.yml index e9b86fa9167..3724f448804 100644 --- a/.github/workflows/hypre.yml +++ b/.github/workflows/hypre.yml @@ -164,7 +164,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index ee1b7503188..e53d163df62 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -248,7 +248,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 5917c7bd074..007ac29cb3a 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -95,7 +95,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/petsc.yml b/.github/workflows/petsc.yml index 0d42d7984c6..eebd9910212 100644 --- a/.github/workflows/petsc.yml +++ b/.github/workflows/petsc.yml @@ -66,7 +66,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/post-pr.yml b/.github/workflows/post-pr.yml index f5b914033b7..2768ef376cc 100644 --- a/.github/workflows/post-pr.yml +++ b/.github/workflows/post-pr.yml @@ -13,7 +13,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 2d9b5fbe457..85eea4dec6d 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -73,7 +73,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/sundials.yml b/.github/workflows/sundials.yml index 43c04da1cec..a2bb21b0780 100644 --- a/.github/workflows/sundials.yml +++ b/.github/workflows/sundials.yml @@ -143,7 +143,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt From 4be40506dbb26d18f1ac46b5c0add4d94aa1f51e Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Fri, 19 Jan 2024 16:29:00 -0800 Subject: [PATCH 093/186] Clean up interpolation routines in AMReX_TracerParticle_mod_K.H and AMReX_Particle_mod_K.H (#3679) The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Revathi Jambunathan <41089244+RevathiJambunathan@users.noreply.github.com> --- Src/Particle/AMReX_ParticleContainer.H | 5 - Src/Particle/AMReX_ParticleContainerI.H | 50 ---- Src/Particle/AMReX_Particle_mod_K.H | 74 ----- Src/Particle/AMReX_TracerParticle_mod_K.H | 265 +++++++----------- Tests/Particles/AssignDensity/main.cpp | 4 - .../AssignMultiLevelDensity/main.cpp | 4 - 6 files changed, 98 insertions(+), 304 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index df71f2eeec1..966ca0a8441 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -1180,11 +1180,6 @@ public: Vector >& mf_to_be_filled, int lev_min, int ncomp, int finest_level, int ngrow=2) const; - void Interpolate (Vector >& mesh_data, - int lev_min, int lev_max); - - void InterpolateSingleLevel (MultiFab& mesh_data, int lev); - void AssignCellDensitySingleLevel (int rho_index, MultiFab& mf, int level, int ncomp=1, int particle_lvl_offset = 0) const; diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 9dd1d39b953..fe83ce5ba1c 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -2481,56 +2481,6 @@ AssignCellDensitySingleLevel (int rho_index, } } -template class Allocator, class CellAssignor> -void -ParticleContainer_impl::Interpolate (Vector >& mesh_data, - int lev_min, int lev_max) -{ - BL_PROFILE("ParticleContainer::Interpolate()"); - for (int lev = lev_min; lev <= lev_max; ++lev) { - InterpolateSingleLevel(*mesh_data[lev], lev); - } -} - -template class Allocator, class CellAssignor> -void -ParticleContainer_impl:: -InterpolateSingleLevel (MultiFab& mesh_data, int lev) -{ - BL_PROFILE("ParticleContainer::InterpolateSingleLevel()"); - - if (mesh_data.nGrow() < 1) { - amrex::Error("Must have at least one ghost cell when in InterpolateSingleLevel"); - } - - const Geometry& gm = Geom(lev); - const auto plo = gm.ProbLoArray(); - const auto dxi = gm.InvCellSizeArray(); - - using ParIter = ParIter_impl; - -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (ParIter pti(*this, lev); pti.isValid(); ++pti) - { - auto& particles = pti.GetArrayOfStructs(); - auto ptd = pti.GetParticleTile().getParticleTileData(); - FArrayBox& fab = mesh_data[pti]; - const auto fabarr = fab.array(); - const Long np = particles.numParticles(); - - int nComp = fab.nComp(); - AMREX_FOR_1D( np, i, - { - auto p = make_particle{}(ptd,i); - amrex_interpolate_cic(p, nComp, fabarr, plo, dxi); - }); - } -} - template class Allocator, class CellAssignor> void diff --git a/Src/Particle/AMReX_Particle_mod_K.H b/Src/Particle/AMReX_Particle_mod_K.H index 2d1f91b4275..ac9254bd62c 100644 --- a/Src/Particle/AMReX_Particle_mod_K.H +++ b/Src/Particle/AMReX_Particle_mod_K.H @@ -233,80 +233,6 @@ void amrex_deposit_particle_dx_cic (P const& p, int nc, amrex::Array4 -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void amrex_interpolate_cic (P const& p, int nc, amrex::Array4 const& acc, - amrex::GpuArray const& plo, - amrex::GpuArray const& dxi) -{ -#if (AMREX_SPACEDIM == 1) - amrex::Real lx = (p.pos(0) - plo[0]) * dxi[0] + Real(0.5); - - int i = static_cast(amrex::Math::floor(lx)); - - amrex::Real xint = lx - static_cast(i); - - amrex::Real sx[] = {Real(1.0)-xint, xint}; - - for (int comp=0; comp < nc; ++comp) { - for (int ii = 0; ii <= 1; ++ii) { - amrex::Real acceleration = sx[ii]*acc(i+ii-1,0,0,comp); - amrex::ignore_unused(acceleration); - } - } -#elif (AMREX_SPACEDIM == 2) - amrex::Real lx = (p.pos(0) - plo[0]) * dxi[0] + Real(0.5); - amrex::Real ly = (p.pos(1) - plo[1]) * dxi[1] + Real(0.5); - - int i = static_cast(amrex::Math::floor(lx)); - int j = static_cast(amrex::Math::floor(ly)); - - amrex::Real xint = lx - static_cast(i); - amrex::Real yint = ly - static_cast(j); - - amrex::Real sx[] = {Real(1.0)-xint, xint}; - amrex::Real sy[] = {Real(1.0)-yint, yint}; - - for (int comp=0; comp < nc; ++comp) { - for (int jj = 0; jj <= 1; ++jj) { - for (int ii = 0; ii <= 1; ++ii) { - amrex::Real acceleration = sx[ii]*sy[jj]*acc(i+ii-1,j+jj-1,0,comp); - amrex::ignore_unused(acceleration); - } - } - } -#elif (AMREX_SPACEDIM == 3) - amrex::Real lx = (p.pos(0) - plo[0]) * dxi[0] + Real(0.5); - amrex::Real ly = (p.pos(1) - plo[1]) * dxi[1] + Real(0.5); - amrex::Real lz = (p.pos(2) - plo[2]) * dxi[2] + Real(0.5); - - int i = static_cast(amrex::Math::floor(lx)); - int j = static_cast(amrex::Math::floor(ly)); - int k = static_cast(amrex::Math::floor(lz)); - - amrex::Real xint = lx - static_cast(i); - amrex::Real yint = ly - static_cast(j); - amrex::Real zint = lz - static_cast(k); - - amrex::Real sx[] = {Real(1.0)-xint, xint}; - amrex::Real sy[] = {Real(1.0)-yint, yint}; - amrex::Real sz[] = {Real(1.0)-zint, zint}; - - for (int comp=0; comp < nc; ++comp) { - for (int kk = 0; kk <= 1; ++kk) { - for (int jj = 0; jj <= 1; ++jj) { - for (int ii = 0; ii <= 1; ++ii) { - amrex::Real acceleration = sx[ii]*sy[jj]*sz[kk]*acc(i+ii-1,j+jj-1,k+kk-1,comp); - amrex::ignore_unused(acceleration); - } - } - } - } -#else - amrex::Abort("Not implemented."); -#endif -} - } #endif diff --git a/Src/Particle/AMReX_TracerParticle_mod_K.H b/Src/Particle/AMReX_TracerParticle_mod_K.H index 433ad864e58..9df0864d33c 100644 --- a/Src/Particle/AMReX_TracerParticle_mod_K.H +++ b/Src/Particle/AMReX_TracerParticle_mod_K.H @@ -1,7 +1,7 @@ #ifndef AMREX_TRACERPARTICLE_MOD_K_H #define AMREX_TRACERPARTICLE_MOD_K_H -#include +#include #include #include #include @@ -9,200 +9,131 @@ #include #include #include - #include -namespace amrex{ +namespace amrex { +/** + \brief Linearly interpolates the mesh data to the particle position from cell-centered data. +*/ template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void cic_interpolate (const P& p, amrex::GpuArray const& plo, amrex::GpuArray const& dxi, - const amrex::Array4 & uccarr, - amrex::ParticleReal * val, int M) + const amrex::Array4& data_arr, + amrex::ParticleReal * val, int M = AMREX_SPACEDIM) { - AMREX_ASSERT(val != nullptr); - -#if (AMREX_SPACEDIM == 1) - - amrex::Real lx = (Real(p.pos(0)) - plo[0]) * dxi[0] - Real(0.5); //len - - int const i = static_cast(amrex::Math::floor(lx)); //cell - - amrex::Real xint = lx - static_cast(i); //frac - - amrex::Real sx[] = {Real(1.0) - xint, xint}; - - for (int d=0; d < M; ++d) - { - val[d] = ParticleReal(0.0); - for (int ii = 0; ii<=1; ++ii) - { - val[d] += static_cast(sx[ii]*uccarr(i+ii,0,0,d)); - } - } - - -#elif (AMREX_SPACEDIM == 2) - - amrex::Real lx = (Real(p.pos(0)) - plo[0]) * dxi[0] - Real(0.5); - amrex::Real ly = (Real(p.pos(1)) - plo[1]) * dxi[1] - Real(0.5); - - int const i = static_cast(amrex::Math::floor(lx)); - int const j = static_cast(amrex::Math::floor(ly)); - - amrex::Real xint = lx - static_cast(i); - amrex::Real yint = ly - static_cast(j); - - amrex::Real sx[] = {Real(1.0) - xint, xint}; - amrex::Real sy[] = {Real(1.0) - yint, yint}; - - for (int d=0; d < M; ++d) - { - val[d] = ParticleReal(0.0); - for (int jj = 0; jj <= 1; ++jj) - { - for (int ii = 0; ii <= 1; ++ii) - { - val[d] += static_cast(sx[ii]*sy[jj]*uccarr(i+ii,j+jj,0,d)); - } - } - } - -#elif (AMREX_SPACEDIM == 3) - - amrex::Real lx = (Real(p.pos(0)) - plo[0]) * dxi[0] - Real(0.5); - amrex::Real ly = (Real(p.pos(1)) - plo[1]) * dxi[1] - Real(0.5); - amrex::Real lz = (Real(p.pos(2)) - plo[2]) * dxi[2] - Real(0.5); - - int const i = static_cast(amrex::Math::floor(lx)); - int const j = static_cast(amrex::Math::floor(ly)); - int const k = static_cast(amrex::Math::floor(lz)); - - amrex::Real const xint = lx - static_cast(i); - amrex::Real const yint = ly - static_cast(j); - amrex::Real const zint = lz - static_cast(k); - - amrex::Real sx[] = {Real(1.0) - xint, xint}; - amrex::Real sy[] = {Real(1.0) - yint, yint}; - amrex::Real sz[] = {Real(1.0) - zint, zint}; - - for (int d=0; d < M; ++d) - { - val[d] = ParticleReal(0.0); - for (int kk = 0; kk<=1; ++kk) - { - for (int jj = 0; jj <= 1; ++jj) - { - for (int ii = 0; ii <= 1; ++ii) - { - val[d] += static_cast(sx[ii]*sy[jj]*sz[kk]*uccarr(i+ii,j+jj,k+kk,d)); - } - } - } - } -#endif + int start_comp = 0; + int ncomp_per_array = M; + int num_arrays = 1; + IntVect is_nodal = amrex::IntVect::TheZeroVector(); + linear_interpolate_to_particle (p, plo, dxi, &data_arr, val, &is_nodal, start_comp, ncomp_per_array, num_arrays); } +/** + \brief Linearly interpolates the mesh data to the particle position from node-centered data. +*/ template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void cic_interpolate (const P& p, - amrex::GpuArray const& plo, - amrex::GpuArray const& dxi, - const amrex::Array4 & uccarr, - amrex::ParticleReal * val) +void cic_interpolate_nd (const P& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const amrex::Array4& data_arr, + amrex::ParticleReal * val, int M = AMREX_SPACEDIM) { - cic_interpolate(p, plo, dxi, uccarr, val, AMREX_SPACEDIM); + int start_comp = 0; + int ncomp_per_array = M; + int num_arrays = 1; + IntVect is_nodal = amrex::IntVect::TheUnitVector(); + linear_interpolate_to_particle (p, plo, dxi, &data_arr, val, &is_nodal, start_comp, ncomp_per_array, num_arrays); } +/** + \brief Linearly interpolates the mesh data to the particle position from face-centered data. + The nth component of the data_arr array is nodal in the nth direction, and cell-centered in the others. +*/ template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mac_interpolate (const P& p, amrex::GpuArray const& plo, amrex::GpuArray const& dxi, - amrex::GpuArray,AMREX_SPACEDIM> const& p_uccarr, + amrex::GpuArray,AMREX_SPACEDIM> const& data_arr, amrex::ParticleReal * val) { + int start_comp = 0; + int ncomp_per_array = 1; + int num_arrays = AMREX_SPACEDIM; + IntVect is_nodal[AMREX_SPACEDIM]; + for (int d=0; d < AMREX_SPACEDIM; ++d) { + is_nodal[d] = amrex::IntVect::TheZeroVector(); + is_nodal[d][d] = 1; + } + linear_interpolate_to_particle (p, plo, dxi, data_arr.data(), val, &is_nodal[0], start_comp, ncomp_per_array, num_arrays); +} -#if (AMREX_SPACEDIM == 1) - for (int d=0; d < AMREX_SPACEDIM; ++d) - { - amrex::Real lx = (Real(p.pos(0))-plo[0])*dxi[0] - static_cast(d != 0)*Real(0.5); - int const i = static_cast(amrex::Math::floor(lx)); - amrex::Real const xint = lx - static_cast(i); +/** + \brief Linearly interpolates the mesh data to the particle position from mesh data. + This general form can handle an arbitrary number of Array4s, each with different staggerings. +*/ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void linear_interpolate_to_particle (const P& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const Array4* data_arr, + amrex::ParticleReal * val, + const IntVect* is_nodal, + int start_comp, int ncomp, int num_arrays) +{ + AMREX_ASSERT(val != nullptr); - amrex::Real sx[] = {Real(1.0) - xint, xint}; + int ctr = 0; - val[d] = ParticleReal(0.0); - for (int ii = 0; ii <= 1; ++ii) - { - val[d] += static_cast((p_uccarr[d])(i+ii, 0, 0, 0)*sx[ii]); - } - } + for (int d = 0; d < num_arrays; d++) + { + AMREX_D_TERM(amrex::Real lx = (Real(p.pos(0))-plo[0])*dxi[0] - static_cast(!is_nodal[d][0])*Real(0.5);, + amrex::Real ly = (Real(p.pos(1))-plo[1])*dxi[1] - static_cast(!is_nodal[d][1])*Real(0.5);, + amrex::Real lz = (Real(p.pos(2))-plo[2])*dxi[2] - static_cast(!is_nodal[d][2])*Real(0.5)); + + // (i0,j0,k0) is the lower corner of the box needed for interpolation + // i0 = (i-1) if particle is lower than center of cell i + // i0 = (i ) if particle is higher than center of cell i + AMREX_D_TERM(int const i0 = static_cast(amrex::Math::floor(lx));, + int const j0 = static_cast(amrex::Math::floor(ly));, + int const k0 = static_cast(amrex::Math::floor(lz))); + + AMREX_D_TERM(amrex::Real const xint = lx - static_cast(i0);, + amrex::Real const yint = ly - static_cast(j0);, + amrex::Real const zint = lz - static_cast(k0)); + + amrex::Real sx[] = {amrex::Real(1.0) - xint, xint}; +#if (AMREX_SPACEDIM > 1) + amrex::Real sy[] = {amrex::Real(1.0) - yint, yint}; +#endif +#if (AMREX_SPACEDIM > 2) + amrex::Real sz[] = {amrex::Real(1.0) - zint, zint}; +#endif -#elif (AMREX_SPACEDIM == 2) - - for (int d=0; d < AMREX_SPACEDIM; ++d) - { - amrex::Real lx = (Real(p.pos(0))-plo[0])*dxi[0] - static_cast(d != 0)*Real(0.5); - amrex::Real ly = (Real(p.pos(1))-plo[1])*dxi[1] - static_cast(d != 1)*Real(0.5); - - int const i = static_cast(amrex::Math::floor(lx)); - int const j = static_cast(amrex::Math::floor(ly)); - - amrex::Real const xint = lx - static_cast(i); - amrex::Real const yint = ly - static_cast(j); - - amrex::Real sx[] = {Real(1.0) - xint, xint}; - amrex::Real sy[] = {Real(1.0) - yint, yint}; - - val[d] = ParticleReal(0.0); - for (int jj = 0; jj <= 1; ++jj) - { - for (int ii = 0; ii <= 1; ++ii) - { - val[d] += static_cast((p_uccarr[d])(i+ii, j+jj, 0, 0)*sx[ii]*sy[jj]); - } - } - } - - -#elif (AMREX_SPACEDIM == 3) - - for (int d=0; d < AMREX_SPACEDIM; ++d) - { - amrex::Real lx = (Real(p.pos(0))-plo[0])*dxi[0] - static_cast(d != 0)*Real(0.5); - amrex::Real ly = (Real(p.pos(1))-plo[1])*dxi[1] - static_cast(d != 1)*Real(0.5); - amrex::Real lz = (Real(p.pos(2))-plo[2])*dxi[2] - static_cast(d != 2)*Real(0.5); - - int const i = static_cast(amrex::Math::floor(lx)); - int const j = static_cast(amrex::Math::floor(ly)); - int const k = static_cast(amrex::Math::floor(lz)); - - amrex::Real const xint = lx - static_cast(i); - amrex::Real const yint = ly - static_cast(j); - amrex::Real const zint = lz - static_cast(k); - - amrex::Real sx[] = {Real(1.0) - xint, xint}; - amrex::Real sy[] = {Real(1.0) - yint, yint}; - amrex::Real sz[] = {Real(1.0) - zint, zint}; - - val[d] = ParticleReal(0.0); - for (int kk = 0; kk <=1; ++kk) - { - for (int jj = 0; jj <= 1; ++jj) - { - for (int ii = 0; ii <= 1; ++ii) - { - val[d] += static_cast((p_uccarr[d])(i+ii, j+jj, k+kk ,0)*sx[ii]*sy[jj]*sz[kk]); - } - } - } - } + for (int comp = start_comp; comp < ncomp; ++comp) { + val[ctr] = ParticleReal(0.0); +#if (AMREX_SPACEDIM > 2) + for (int kk = 0; kk <=1; ++kk) { #endif -} -} + +#if (AMREX_SPACEDIM > 1) + for (int jj = 0; jj <= 1; ++jj) { #endif + for (int ii = 0; ii <= 1; ++ii) { + val[ctr] += static_cast((data_arr[d])(IntVect(AMREX_D_DECL(i0+ii, j0+jj, k0+kk)), comp) * + AMREX_D_TERM(sx[ii],*sy[jj],*sz[kk])); + AMREX_D_TERM(},},}); + ctr++; + } // ncomp + } // d +} + +} // namespace amrex +#endif // include guard diff --git a/Tests/Particles/AssignDensity/main.cpp b/Tests/Particles/AssignDensity/main.cpp index fb4a0fb970b..d8caa32870e 100644 --- a/Tests/Particles/AssignDensity/main.cpp +++ b/Tests/Particles/AssignDensity/main.cpp @@ -68,10 +68,6 @@ void test_assign_density(TestParams& parms) myPC.InitRandom(num_particles, iseed, pdata, serialize); myPC.AssignCellDensitySingleLevel(0, partMF, 0, 1 + AMREX_SPACEDIM, 0); - // myPC.AssignDensitySingleLevel(0, partMF, 0, 4, 0); - - // myPC.InterpolateSingleLevel(acceleration, 0); - MultiFab::Copy(density, partMF, 0, 0, 1, 0); WriteSingleLevelPlotfile("plt00000", partMF, diff --git a/Tests/Particles/AssignMultiLevelDensity/main.cpp b/Tests/Particles/AssignMultiLevelDensity/main.cpp index 0ffa37dd695..21ae50725d2 100644 --- a/Tests/Particles/AssignMultiLevelDensity/main.cpp +++ b/Tests/Particles/AssignMultiLevelDensity/main.cpp @@ -98,14 +98,10 @@ void test_assign_density(TestParams& parms) double mass = 10.0; MyParticleContainer::ParticleInitData pdata = {{mass},{},{},{}}; - // myPC.InitRandom(num_particles, iseed, pdata, serialize, fine_box); myPC.InitRandom(num_particles, iseed, pdata, serialize); - //myPC.AssignDensity(0, true, partMF, 0, 1, 1); myPC.AssignDensity(0, partMF, 0, 1, nlevs-1); - myPC.Interpolate(acceleration, 0, nlevs-1); - for (int lev = 0; lev < nlevs; ++lev) { MultiFab::Copy(*density[lev], *partMF[lev], 0, 0, 1, 0); } From 6f1c74f7a9295f7b217a2ef8a631cbe136bc689e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 19 Jan 2024 17:50:23 -0800 Subject: [PATCH 094/186] Revert "add a dependabot.yml to checkout github actions versions (#3704)" (#3710) This reverts commit 53a581cc802734846f1c102e593bddb828922cf0. It creates too much noise for forks, and it cannot be disabled for forks. Another issue. It appears that it just called docs.yml and push a new branch "main" to AMReX-Codes/amrex, not AMReX-Codes/AMReX-Codes.github.io. --- .github/dependabot.yml | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index b4f368c6680..00000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,8 +0,0 @@ -# Dependabot configuration -# ref: https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates -version: 2 -updates: - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "weekly" From 022f97ea9ebbc28fca25f38f3eda26ce68b6230b Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 19 Jan 2024 21:01:26 -0800 Subject: [PATCH 095/186] Update Build and Deploy action (#3712) Make some changes because a new version of JamesIves/github-pages-deploy-action is used now. --- .github/workflows/docs.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e4452b9f6f9..49d564b36b3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -29,10 +29,9 @@ jobs: if: github.event_name == 'push' && github.repository == 'AMReX-Codes/amrex' && github.ref == 'refs/heads/development' uses: JamesIves/github-pages-deploy-action@v4.5.0 with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ACCESS_TOKEN: ${{ secrets.DEPLOY_DOCS }} - REPOSITORY_NAME: AMReX-Codes/AMReX-Codes.github.io - BRANCH: main # The branch the action should deploy to. - FOLDER: build # The folder the action should deploy. - TARGET_FOLDER: amrex # The folder the action should deploy to. - CLEAN: false # Do not remove existing files from the deploy target. + ssh-key: ${{ secrets.AMREX_BUILD_DOCS }} + repository-name: AMReX-Codes/AMReX-Codes.github.io + branch: main # The branch the action should deploy to. + folder: build # The folder the action should deploy. + target-folder: amrex # The folder the action should deploy to. + clean: false # Do not remove existing files from the deploy target. From 0c59bad946fdf5ee86f0a48204324c5efaad7284 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 22 Jan 2024 15:22:54 -0800 Subject: [PATCH 096/186] lockAdd: case of 2D plane in 3D (#3700) ## Summary In HiPACE++, atomicAdd is used on 2d x & y planes even though AMREX_SPACEDIM is 3. In that case, we would have all threads competing for a single lock in the previous implementation of lockAdd. This PR fixes this use case by having locks associated with the y-direction when the number of cells in the z-direction is 1. ## Additional background https://github.com/Hi-PACE/hipace/pull/1059 ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_BaseFab.H | 60 +++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index c4820bbe923..b983367c21d 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -3330,15 +3330,25 @@ BaseFab::lockAdd (const BaseFab& src, const Box& srcbox, const Box& destbo Array4 const& d = this->array(); Array4 const& s = src.const_array(); - auto const& dlo = destbox.smallEnd(); -#if (AMREX_SPACEDIM == 3) - auto const& dhi = destbox.bigEnd(); -#endif - auto const& slo = srcbox.smallEnd(); - auto const offset = slo - dlo; - auto const lenx = srcbox.length(0); + auto const& dlo = amrex::lbound(destbox); + auto const& dhi = amrex::ubound(destbox); + auto const& len = amrex::length(destbox); + auto const& slo = amrex::lbound(srcbox); + Dim3 const offset{slo.x-dlo.x, slo.y-dlo.y, slo.z-dlo.z}; + + int planedim; + int nplanes; + int plo; + if (len.z == 1) { + planedim = 1; + nplanes = len.y; + plo = dlo.y; + } else { + planedim = 2; + nplanes = len.z; + plo = dlo.z; + } - auto const nplanes = srcbox.length(AMREX_SPACEDIM-1); auto* mask = (bool*) amrex_mempool_alloc(sizeof(bool)*nplanes); for (int ip = 0; ip < nplanes; ++ip) { mask[ip] = false; @@ -3348,27 +3358,31 @@ BaseFab::lockAdd (const BaseFab& src, const Box& srcbox, const Box& destbo int planes_left = nplanes; while (planes_left > 0) { AMREX_ASSERT(mm < nplanes); - auto const m = mm + dlo[AMREX_SPACEDIM-1]; + auto const m = mm + plo; int ilock = m % OpenMP::nlocks; if (ilock < 0) { ilock += OpenMP::nlocks; } auto* lock = &(OpenMP::omp_locks[ilock]); if (omp_test_lock(lock)) { - for (int n = 0; n < numcomp; ++n) - { -#if (AMREX_SPACEDIM == 3) - for (int j = dlo[1]; j <= dhi[1]; ++j) - { - IntVect div(dlo[0], j, m); -#elif (AMREX_SPACEDIM == 2) - { - IntVect div(dlo[0], m); -#endif - auto * pdst = d.ptr(div ,n+destcomp); - auto const* psrc = s.ptr(div+offset,n+srccomp); + auto lo = dlo; + auto hi = dhi; + if (planedim == 1) { + lo.y = m; + hi.y = m; + } else { + lo.z = m; + hi.z = m; + } + + for (int n = 0; n < numcomp; ++n) { + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + auto * pdst = d.ptr(dlo.x,j ,k ,n+destcomp); + auto const* psrc = s.ptr(slo.x,j+offset.y,k+offset.z,n+ srccomp); #pragma omp simd - for (int ii = 0; ii < lenx; ++ii) { - pdst[ii] += psrc[ii]; + for (int ii = 0; ii < len.x; ++ii) { + pdst[ii] += psrc[ii]; + } } } } From 2fc8a34ced0c0acae04298c69ec23803ec05e9cd Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Tue, 23 Jan 2024 07:29:23 -0800 Subject: [PATCH 097/186] Fix warnings in DenseBins::build with serial bin policy (#3716) --- Src/Particle/AMReX_DenseBins.H | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Src/Particle/AMReX_DenseBins.H b/Src/Particle/AMReX_DenseBins.H index 5ff9ecdc23f..b13d6321da2 100644 --- a/Src/Particle/AMReX_DenseBins.H +++ b/Src/Particle/AMReX_DenseBins.H @@ -481,7 +481,7 @@ public: m_offsets.resize(0); m_offsets.resize(nbins+1); - for (int i = 0; i < nitems; ++i) { + for (N i = 0; i < nitems; ++i) { m_bins[i] = call_f(f,v,i); ++m_counts[m_bins[i]]; } @@ -490,7 +490,7 @@ public: Gpu::copy(Gpu::deviceToDevice, m_offsets.begin(), m_offsets.end(), m_counts.begin()); - for (int i = 0; i < nitems; ++i) { + for (N i = 0; i < nitems; ++i) { index_type index = m_counts[m_bins[i]]++; m_perm[index] = i; } From 73b215557c0e842c3e829b683939bbb7a7e12373 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Tue, 23 Jan 2024 10:35:12 -0800 Subject: [PATCH 098/186] relax constraint that real_comp_names.size() == pc.NumRealComps() + NStructReal for pure SoA plotfiles (#3717) The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Particle/AMReX_ParticleIO.H | 74 ++++++++++++++----- Src/Particle/AMReX_WriteBinaryParticleData.H | 33 ++++++--- Tests/Particles/CheckpointRestartSOA/main.cpp | 2 +- 3 files changed, 80 insertions(+), 29 deletions(-) diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index a10f9973a01..c243a61d81d 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -51,7 +51,9 @@ ParticleContainer_impl write_real_comp; Vector tmp_real_comp_names; - for (int i = 0; i < NStructReal + NumRealComps(); ++i ) + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + + for (int i = 0; i < nrc; ++i ) { write_real_comp.push_back(1); if (real_comp_names.empty()) @@ -96,7 +98,9 @@ ParticleContainer_impl write_real_comp; Vector real_comp_names; - for (int i = 0; i < NStructReal + NumRealComps(); ++i ) + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + + for (int i = 0; i < nrc; ++i ) { write_real_comp.push_back(1); std::stringstream ss; @@ -127,11 +131,16 @@ ParticleContainer_impl& real_comp_names, const Vector& int_comp_names) const { - AMREX_ASSERT(real_comp_names.size() == NStructReal + NumRealComps()); + if constexpr(ParticleType::is_soa_particle) { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal - AMREX_SPACEDIM); // pure SoA: skip positions + } else { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal); + } AMREX_ASSERT( int_comp_names.size() == NStructInt + NumIntComps() ); Vector write_real_comp; - for (int i = 0; i < NStructReal + NumRealComps(); ++i) { + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + for (int i = 0; i < nrc; ++i) { write_real_comp.push_back(1); } @@ -153,10 +162,15 @@ ParticleContainer_impl& real_comp_names) const { - AMREX_ASSERT(real_comp_names.size() == NStructReal + NumRealComps()); + if constexpr(ParticleType::is_soa_particle) { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal - AMREX_SPACEDIM); // pure SoA: skip positions + } else { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal); + } Vector write_real_comp; - for (int i = 0; i < NStructReal + NumRealComps(); ++i) { + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + for (int i = 0; i < nrc; ++i) { write_real_comp.push_back(1); } @@ -188,11 +202,17 @@ ParticleContainer_impl& write_real_comp, const Vector& write_int_comp) const { - AMREX_ASSERT(write_real_comp.size() == NStructReal + NumRealComps()); + + if constexpr(ParticleType::is_soa_particle) { + AMREX_ALWAYS_ASSERT(write_real_comp.size() == NumRealComps() + NStructReal - AMREX_SPACEDIM); // pure SoA: skip positions + } else { + AMREX_ALWAYS_ASSERT(write_real_comp.size() == NumRealComps() + NStructReal); + } AMREX_ASSERT(write_int_comp.size() == NStructInt + NArrayInt ); Vector real_comp_names; - for (int i = 0; i < NStructReal + NumRealComps(); ++i ) + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + for (int i = 0; i < nrc; ++i ) { std::stringstream ss; ss << "real_comp" << i; @@ -239,7 +259,9 @@ ParticleContainer_impl write_real_comp; Vector real_comp_names; - for (int i = 0; i < NStructReal + NumRealComps(); ++i ) + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + + for (int i = 0; i < nrc; ++i ) { write_real_comp.push_back(1); std::stringstream ss; @@ -271,11 +293,16 @@ ParticleContainer_impl& real_comp_names, const Vector& int_comp_names, F&& f) const { - AMREX_ASSERT(real_comp_names.size() == NStructReal + NumRealComps()); + if constexpr(ParticleType::is_soa_particle) { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal - AMREX_SPACEDIM); // pure SoA: skip positions + } else { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal); + } AMREX_ASSERT( int_comp_names.size() == NStructInt + NArrayInt ); Vector write_real_comp; - for (int i = 0; i < NStructReal + NumRealComps(); ++i) { + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + for (int i = 0; i < nrc; ++i) { write_real_comp.push_back(1); } @@ -298,10 +325,15 @@ ParticleContainer_impl& real_comp_names, F&& f) const { - AMREX_ASSERT(real_comp_names.size() == NStructReal + NumRealComps()); + if constexpr(ParticleType::is_soa_particle) { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal - AMREX_SPACEDIM); // pure SoA: skip positions + } else { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal); + } Vector write_real_comp; - for (int i = 0; i < NStructReal + NumRealComps(); ++i) { + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + for (int i = 0; i < nrc; ++i) { write_real_comp.push_back(1); } @@ -334,11 +366,16 @@ ParticleContainer_impl& write_real_comp, const Vector& write_int_comp, F&& f) const { - AMREX_ASSERT(write_real_comp.size() == NStructReal + NumRealComps()); + if constexpr(ParticleType::is_soa_particle) { + AMREX_ALWAYS_ASSERT(write_real_comp.size() == NumRealComps() + NStructReal - AMREX_SPACEDIM); // pure SoA: skip positions + } else { + AMREX_ALWAYS_ASSERT(write_real_comp.size() == NumRealComps() + NStructReal); + } AMREX_ASSERT(write_int_comp.size() == NStructInt + NumIntComps() ); Vector real_comp_names; - for (int i = 0; i < NStructReal + NumRealComps(); ++i ) + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + for (int i = 0; i < nrc; ++i ) { std::stringstream ss; ss << "real_comp" << i; @@ -680,8 +717,9 @@ ParticleContainer_impl> nr; - if (nr != NStructReal + NumRealComps()) { - amrex::Abort("ParticleContainer::Restart(): nr != NStructReal + NumRealComps()"); + int nrc = ParticleType::is_soa_particle ? NStructReal + NumRealComps() - AMREX_SPACEDIM : NStructReal + NumRealComps(); + if (nr != nrc) { + amrex::Abort("ParticleContainer::Restart(): nr not the expected value"); } std::string comp_name; @@ -929,7 +967,7 @@ ParticleContainer_impl rstuff(std::size_t(cnt)*rChunkSize); ReadParticleRealData(rstuff.dataPtr(), rstuff.size(), ifs); diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 105a254a348..449c26c6ef2 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -289,7 +289,7 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l idata.resize(np*iChunkSize); int num_output_real = 0; - for (int i = 0; i < pc.NumRealComps() + PC::NStructReal; ++i) { + for (int i = 0; i < (int) write_real_comp.size(); ++i) { if (write_real_comp[i]) { ++num_output_real; } } @@ -361,7 +361,9 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l // extra SoA Real components const int real_start_offset = PC::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; // pure SoA: skip positions for (int j = real_start_offset; j < pc.NumRealComps(); j++) { - if (write_real_comp[PC::NStructReal+j]) { + const int write_comp_offset = PC::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; // pure SoA: skip positions + const int write_comp_index = PC::NStructReal+j-write_comp_offset; + if (write_real_comp[write_comp_index]) { *rptr = (ParticleReal) soa.GetRealData(j)[pindex]; ++rptr; } @@ -393,7 +395,11 @@ void WriteBinaryParticleDataSync (PC const& pc, const int NProcs = ParallelDescriptor::NProcs(); const int IOProcNumber = ParallelDescriptor::IOProcessorNumber(); - AMREX_ALWAYS_ASSERT(real_comp_names.size() == pc.NumRealComps() + NStructReal); + if constexpr(PC::ParticleType::is_soa_particle) { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == pc.NumRealComps() + NStructReal - AMREX_SPACEDIM); // pure SoA: skip positions + } else { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == pc.NumRealComps() + NStructReal); + } AMREX_ALWAYS_ASSERT( int_comp_names.size() == pc.NumIntComps() + NStructInt); std::string pdir = dir; @@ -476,7 +482,7 @@ void WriteBinaryParticleDataSync (PC const& pc, } int num_output_real = 0; - for (int i = 0; i < pc.NumRealComps() + NStructReal; ++i) { + for (int i = 0; i < (int) write_real_comp.size(); ++i) { if (write_real_comp[i]) { ++num_output_real; } } @@ -492,7 +498,7 @@ void WriteBinaryParticleDataSync (PC const& pc, HdrFile << num_output_real << '\n'; // Real component names - for (int i = 0; i < NStructReal + pc.NumRealComps(); ++i ) { + for (int i = 0; i < (int) real_comp_names.size(); ++i ) { if (write_real_comp[i]) { HdrFile << real_comp_names[i] << '\n'; } } @@ -683,7 +689,11 @@ void WriteBinaryParticleDataAsync (PC const& pc, const int NProcs = ParallelDescriptor::NProcs(); const int IOProcNumber = NProcs - 1; - AMREX_ALWAYS_ASSERT(real_comp_names.size() == pc.NumRealComps() + NStructReal); + if constexpr(PC::ParticleType::is_soa_particle) { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == pc.NumRealComps() + NStructReal - AMREX_SPACEDIM); // pure SoA: skip positions + } else { + AMREX_ALWAYS_ASSERT(real_comp_names.size() == pc.NumRealComps() + NStructReal); + } AMREX_ALWAYS_ASSERT( int_comp_names.size() == pc.NumIntComps() + NStructInt); Vector > np_per_grid_local(pc.finestLevel()+1); @@ -847,6 +857,7 @@ void WriteBinaryParticleDataAsync (PC const& pc, int nrc = pc.NumRealComps(); int nic = pc.NumIntComps(); + int rnames_size = (int) real_comp_names.size(); auto RD = pc.ParticleRealDescriptor; @@ -881,7 +892,7 @@ void WriteBinaryParticleDataAsync (PC const& pc, } int num_output_real = 0; - for (int i = 0; i < nrc + NStructReal; ++i) { + for (int i = 0; i < rnames_size; ++i) { if (write_real_comp[i]) { ++num_output_real; } } @@ -897,7 +908,7 @@ void WriteBinaryParticleDataAsync (PC const& pc, HdrFile << num_output_real << '\n'; // Real component names - for (int i = 0; i < NStructReal + nrc; ++i ) { + for (int i = 0; i < rnames_size; ++i ) { if (write_real_comp[i]) { HdrFile << real_comp_names[i] << '\n'; } } @@ -1047,7 +1058,7 @@ void WriteBinaryParticleDataAsync (PC const& pc, // Write the Real data in binary. int num_output_real = 0; - for (int i = 0; i < nrc + NStructReal; ++i) { + for (int i = 0; i < rnames_size; ++i) { if (write_real_comp[i]) { ++num_output_real; } } @@ -1093,7 +1104,9 @@ void WriteBinaryParticleDataAsync (PC const& pc, const int real_start_offset = PC::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; // pure SoA: positions for (int j = real_start_offset; j < nrc; j++) { - if (write_real_comp[NStructReal+j]) + const int write_comp_offset = PC::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; // pure SoA: skip positions + const int write_comp_index = PC::NStructReal+j-write_comp_offset; + if (write_real_comp[write_comp_index]) { *rptr = (typename PC::ParticleType::RealType) soa.GetRealData(j)[pindex]; ++rptr; diff --git a/Tests/Particles/CheckpointRestartSOA/main.cpp b/Tests/Particles/CheckpointRestartSOA/main.cpp index e8629560196..0458239fdcd 100644 --- a/Tests/Particles/CheckpointRestartSOA/main.cpp +++ b/Tests/Particles/CheckpointRestartSOA/main.cpp @@ -120,7 +120,7 @@ void test () amrex::Print() << " done \n"; Vector particle_realnames; - for (int i = 0; i < NReal; ++i) { + for (int i = 0; i < NReal-AMREX_SPACEDIM; ++i) { particle_realnames.push_back("particle_real_component_" + std::to_string(i)); } From 3e2a3c2940f4db6c740fdff863ed9b5cee36790b Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Tue, 23 Jan 2024 12:55:41 -0800 Subject: [PATCH 099/186] Interpolation routines for tracers with mapped_z. (#3714) This code is from @asalmgren and @RevathiJambunathan . The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Revathi Jambunathan <41089244+RevathiJambunathan@users.noreply.github.com> Co-authored-by: Ann Almgren --- Src/Particle/AMReX_TracerParticle_mod_K.H | 262 ++++++++++++++++++++++ 1 file changed, 262 insertions(+) diff --git a/Src/Particle/AMReX_TracerParticle_mod_K.H b/Src/Particle/AMReX_TracerParticle_mod_K.H index 9df0864d33c..472057a408e 100644 --- a/Src/Particle/AMReX_TracerParticle_mod_K.H +++ b/Src/Particle/AMReX_TracerParticle_mod_K.H @@ -13,6 +13,12 @@ namespace amrex { +// +// ********************************************************************** +// Regular coordinates +// ********************************************************************** +// + /** \brief Linearly interpolates the mesh data to the particle position from cell-centered data. */ @@ -135,5 +141,261 @@ void linear_interpolate_to_particle (const P& p, } // d } +// +// ********************************************************************** +// Terrain-fitted coordinates +// ********************************************************************** +// + +/** + \brief Linearly interpolates the mesh data to the particle position from cell-centered data + on a terrain-fitted grid. +*/ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cic_interpolate_mapped_z (const P& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const amrex::Array4& data_arr, + const amrex::Array4& height_arr, + amrex::ParticleReal * val, int M = AMREX_SPACEDIM) +{ + cic_interpolate_cc_mapped_z(p, plo, dxi, data_arr, height_arr, val, M); +} + +/** + \brief Linearly interpolates the mesh data to the particle position from cell-centered data + on a terrain-fitted grid. +*/ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cic_interpolate_cc_mapped_z (const P& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const amrex::Array4& data_arr, + const amrex::Array4& height_arr, + amrex::ParticleReal * val, int M = AMREX_SPACEDIM) +{ + int icomp = 0; + int ncomp_per_array = M; + int num_arrays = 1; + IntVect is_nodal = amrex::IntVect::TheZeroVector(); + linear_interpolate_to_particle_z(p, plo, dxi, &data_arr, height_arr, + val, &is_nodal, icomp, ncomp_per_array, num_arrays); +} + +/** + \brief Linearly interpolates the mesh data to the particle position from node-centered data. + on a terrain-fitted grid. +*/ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cic_interpolate_nd_mapped_z (const P& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const amrex::Array4& data_arr, + const amrex::Array4& height_arr, + amrex::ParticleReal * val, int M = AMREX_SPACEDIM) +{ + int icomp = 0; + int ncomp_per_array = M; + int num_arrays = 1; + IntVect is_nodal = amrex::IntVect::TheUnitVector(); + linear_interpolate_to_particle_z(p, plo, dxi, &data_arr, height_arr, + val, &is_nodal, icomp, ncomp_per_array, num_arrays); +} + +/** + \brief Linearly interpolates the mesh data to the particle position from face-centered data + on a terrain-fitted grid. + The nth component of the data_arr array is nodal in the nth direction, and cell-centered in the others. +*/ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mac_interpolate_mapped_z (const P& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + amrex::GpuArray,AMREX_SPACEDIM> const& data_arr, + const amrex::Array4& height_arr, + amrex::ParticleReal * val) +{ + int icomp = 0; + int ncomp_per_array = 1; + int num_arrays = AMREX_SPACEDIM; + IntVect is_nodal[AMREX_SPACEDIM]; + for (int d=0; d < AMREX_SPACEDIM; ++d) { + is_nodal[d] = amrex::IntVect::TheZeroVector(); + is_nodal[d][d] = 1; + } + linear_interpolate_to_particle_z(p, plo, dxi, data_arr.data(), height_arr, + val, &is_nodal[0], icomp, ncomp_per_array, num_arrays); +} + +/** + \brief Linearly interpolates the mesh data to the particle position from mesh data. + This general form can handle an arbitrary number of Array4s, each with different staggerings + on a terrain-fitted grid. +*/ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void linear_interpolate_to_particle_z (const P& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const Array4* data_arr, + const amrex::Array4& height_arr, + amrex::ParticleReal * val, + const IntVect* is_nodal, + int start_comp, int ncomp, int num_arrays) +{ +#if (AMREX_SPACEDIM == 1) + amrex::ignore_unused(p, plo, dxi, data_arr, height_arr, val, is_nodal, start_comp, ncomp, num_arrays); + amrex::Abort(" Terrain fitted grid interpolation is not supported in 1D\n"); +#else + AMREX_ASSERT(val != nullptr); + + int ctr = 0; + + for (int d = 0; d < num_arrays; d++) + { + AMREX_D_TERM(amrex::Real lx = (Real(p.pos(0))-plo[0])*dxi[0] - static_cast(!is_nodal[d][0])*Real(0.5);, + amrex::Real ly = (Real(p.pos(1))-plo[1])*dxi[1] - static_cast(!is_nodal[d][1])*Real(0.5);,); + + int const i0 = static_cast(amrex::Math::floor(lx)); + int k = 0; // temporary for 2D + +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(ly); + int const j = p.idata(0); + + amrex::Real hlo_xlo = amrex::Real(0.25) * ( height_arr(i0 , j , k) + + height_arr(i0 + (!(is_nodal[d][0])) , j , k) + + height_arr(i0 , j + (!is_nodal[d][1]) , k) + + height_arr(i0 + (!(is_nodal[d][0])) , j + (!is_nodal[d][1]) , k) ); + + amrex::Real hlo_xhi = 0.25 * ( height_arr(i0 + 1 , j , k ) + + height_arr(i0 + 1 + (!(is_nodal[d][0])) , j , k ) + + height_arr(i0 + 1 , j + (!is_nodal[d][1]), k ) + + height_arr(i0 + 1 + (!(is_nodal[d][0])) , j + (!is_nodal[d][1]), k ) ); + + + amrex::Real const xint = lx - static_cast(i0); + amrex::Real sx[] = { amrex::Real(1.) - xint, xint}; + amrex::Real height_at_px = sx[0] * hlo_xlo + sx[1] * hlo_xhi; + + int const j0 = (amrex::Real(p.pos(1)) >= height_at_px) ? j : j-1; + + int yctr = 0; + amrex::Real ht[4]; + for (int ii=0; ii < 2; ++ii) { + for (int jj=0; jj < 2; ++jj) { + ht[yctr] = amrex::Real(0.25) * ( height_arr(i0 + ii , j0 + jj , k ) + + height_arr(i0 + ii + (!(is_nodal[d][0])) , j0 + jj , k ) + + height_arr(i0 + ii , j0 + jj + (!is_nodal[d][1]), k ) + + height_arr(i0 + ii + (!(is_nodal[d][0])) , j0 + jj + (!is_nodal[d][1]), k ) ); + ++yctr; + } + } + amrex::Real hint_ilo = (p.pos(1) - ht[0]) / (ht[1] - ht[0]); + amrex::Real hint_ihi = (p.pos(1) - ht[2]) / (ht[3] - ht[2]); + + amrex::Real sy[] = { amrex::Real(1.) - hint_ilo, amrex::Real(1.) - hint_ihi, + hint_ilo, hint_ihi}; + +#elif (AMREX_SPACEDIM == 3) + + int const j0 = static_cast(amrex::Math::floor(ly)); + k = p.idata(0); + amrex::Real const xint = lx - static_cast(i0); + amrex::Real const yint = ly - static_cast(j0); + amrex::Real sx[] = { amrex::Real(1.) - xint, xint}; + amrex::Real sy[] = { amrex::Real(1.) - yint, yint}; + + amrex::Real hlo[4]; + int ilo = 0; + amrex::Real height_at_pxy = 0.; + for (int ii = 0; ii < 2; ++ii) { + for (int jj = 0; jj < 2; ++jj) { + hlo[ilo] = amrex::Real(0.125) + * ( height_arr(i0 + ii , j0 + jj , k ) + + height_arr(i0 + ii + (!is_nodal[d][0]), j0 + jj , k ) + + height_arr(i0 + ii , j0 + jj + (!is_nodal[d][1]), k ) + + height_arr(i0 + ii + (!is_nodal[d][0]), j0 + jj + (!is_nodal[d][1]), k ) + + height_arr(i0 + ii , j0 + jj , k + (!is_nodal[d][2])) + + height_arr(i0 + ii + (!is_nodal[d][0]), j0 + jj , k + (!is_nodal[d][2])) + + height_arr(i0 + ii , j0 + jj + (!is_nodal[d][1]), k + (!is_nodal[d][2])) + + height_arr(i0 + ii + (!is_nodal[d][0]), j0 + jj + (!is_nodal[d][1]), k + (!is_nodal[d][2])) + ); + height_at_pxy += hlo[ilo] * sx[ii] * sy[jj]; + ++ilo; + } + } + + int const k0 = (amrex::Real(p.pos(2)) >= height_at_pxy ) ? k : k-1; + + // to make z0-z7 concise + int zctr = 0; + amrex::Real ht[8]; + for (int ii = 0; ii < 2; ++ii) { + for (int jj = 0; jj < 2; ++jj) { + for (int kk = 0; kk < 2; ++kk) { + ht[zctr] = amrex::Real(0.125) * + ( height_arr(i0 + ii , j0 + jj , k0 + kk ) + + height_arr(i0 + ii , j0 + jj , k0 + kk + (!is_nodal[d][2])) + + height_arr(i0 + ii , j0 + jj + (!is_nodal[d][1]), k0 + kk ) + + height_arr(i0 + ii , j0 + jj + (!is_nodal[d][1]), k0 + kk + (!is_nodal[d][2])) + + height_arr(i0 + ii + (!is_nodal[d][0]), j0 + jj , k0 + kk ) + + height_arr(i0 + ii + (!is_nodal[d][0]), j0 + jj , k0 + kk + (!is_nodal[d][2])) + + height_arr(i0 + ii + (!is_nodal[d][0]), j0 + jj + (!is_nodal[d][1]), k0 + kk ) + + height_arr(i0 + ii + (!is_nodal[d][0]), j0 + jj + (!is_nodal[d][1]), k0 + kk + (!is_nodal[d][2])) + ); + ++zctr; + }}} + + amrex::Real hint_ilojlo = ( p.pos(2) - ht[0] ) / (ht[1] - ht[0]); + amrex::Real hint_ilojhi = ( p.pos(2) - ht[2] ) / (ht[3] - ht[2]); + amrex::Real hint_ihijlo = ( p.pos(2) - ht[4] ) / (ht[5] - ht[4]); + amrex::Real hint_ihijhi = ( p.pos(2) - ht[6] ) / (ht[7] - ht[6]); + + amrex::Real sz[] = { amrex::Real(1.) - hint_ilojlo, + amrex::Real(1.) - hint_ihijlo, + amrex::Real(1.) - hint_ilojhi, + amrex::Real(1.) - hint_ihijhi, + hint_ilojlo, + hint_ihijlo, + hint_ilojhi, + hint_ihijhi}; +#endif + for (int comp = start_comp; comp < ncomp; ++comp) { + val[ctr] = amrex::ParticleReal(0.); +#if (AMREX_SPACEDIM == 2) + // interpolate 4 points in y + int k0 = 0; + int sy_ctr = 0; + for (int jj = 0; jj <= 1; ++jj) { + for (int ii = 0; ii <=1; ++ii) { + val[ctr] += static_cast( (data_arr[d])(i0+ii, j0+jj, k0 ,comp)*sx[ii]*sy[sy_ctr] ); + ++sy_ctr; + } + } +#elif (AMREX_SPACEDIM == 3) + // not tested yet + int sz_ctr = 0; + for (int kk = 0; kk <= 1; ++kk) { + for (int jj = 0; jj <= 1; ++jj) { + for (int ii = 0; ii <= 1; ++ii) { + val[ctr] += static_cast( + (data_arr[d])(i0+ii, j0+jj, k0 + kk, comp)*sx[ii]*sy[jj]*sz[sz_ctr]); + ++sz_ctr; + } + } + } +#endif + ctr++; + } // ncomp + } // d +#endif +} + + } // namespace amrex #endif // include guard From 0ccffb42ab1d7679496c177106723877d4638bf1 Mon Sep 17 00:00:00 2001 From: Revathi Jambunathan <41089244+RevathiJambunathan@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:01:22 -0800 Subject: [PATCH 100/186] Minor cleanup of commented code and alignment (#3719) ## Summary ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Particle/AMReX_TracerParticle_mod_K.H | 51 ++++++++++++++--------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/Src/Particle/AMReX_TracerParticle_mod_K.H b/Src/Particle/AMReX_TracerParticle_mod_K.H index 472057a408e..82c1cb8c663 100644 --- a/Src/Particle/AMReX_TracerParticle_mod_K.H +++ b/Src/Particle/AMReX_TracerParticle_mod_K.H @@ -29,6 +29,17 @@ void cic_interpolate (const P& p, amrex::GpuArray const& dxi, const amrex::Array4& data_arr, amrex::ParticleReal * val, int M = AMREX_SPACEDIM) +{ + cic_interpolate_cc(p, plo, dxi, data_arr, val, M); +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cic_interpolate_cc (const P& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const amrex::Array4& data_arr, + amrex::ParticleReal * val, int M = AMREX_SPACEDIM) { int start_comp = 0; int ncomp_per_array = M; @@ -261,18 +272,20 @@ void linear_interpolate_to_particle_z (const P& p, amrex::Real ly = (Real(p.pos(1))-plo[1])*dxi[1] - static_cast(!is_nodal[d][1])*Real(0.5);,); int const i0 = static_cast(amrex::Math::floor(lx)); - int k = 0; // temporary for 2D + int k = 0; #if (AMREX_SPACEDIM == 2) amrex::ignore_unused(ly); int const j = p.idata(0); - amrex::Real hlo_xlo = amrex::Real(0.25) * ( height_arr(i0 , j , k) + amrex::Real hlo_xlo = amrex::Real(0.25) + * ( height_arr(i0 , j , k) + height_arr(i0 + (!(is_nodal[d][0])) , j , k) + height_arr(i0 , j + (!is_nodal[d][1]) , k) + height_arr(i0 + (!(is_nodal[d][0])) , j + (!is_nodal[d][1]) , k) ); - amrex::Real hlo_xhi = 0.25 * ( height_arr(i0 + 1 , j , k ) + amrex::Real hlo_xhi = amrex::Real(0.25) + * ( height_arr(i0 + 1 , j , k ) + height_arr(i0 + 1 + (!(is_nodal[d][0])) , j , k ) + height_arr(i0 + 1 , j + (!is_nodal[d][1]), k ) + height_arr(i0 + 1 + (!(is_nodal[d][0])) , j + (!is_nodal[d][1]), k ) ); @@ -288,7 +301,8 @@ void linear_interpolate_to_particle_z (const P& p, amrex::Real ht[4]; for (int ii=0; ii < 2; ++ii) { for (int jj=0; jj < 2; ++jj) { - ht[yctr] = amrex::Real(0.25) * ( height_arr(i0 + ii , j0 + jj , k ) + ht[yctr] = amrex::Real(0.25) + * ( height_arr(i0 + ii , j0 + jj , k ) + height_arr(i0 + ii + (!(is_nodal[d][0])) , j0 + jj , k ) + height_arr(i0 + ii , j0 + jj + (!is_nodal[d][1]), k ) + height_arr(i0 + ii + (!(is_nodal[d][0])) , j0 + jj + (!is_nodal[d][1]), k ) ); @@ -332,7 +346,6 @@ void linear_interpolate_to_particle_z (const P& p, int const k0 = (amrex::Real(p.pos(2)) >= height_at_pxy ) ? k : k-1; - // to make z0-z7 concise int zctr = 0; amrex::Real ht[8]; for (int ii = 0; ii < 2; ++ii) { @@ -351,24 +364,23 @@ void linear_interpolate_to_particle_z (const P& p, ++zctr; }}} - amrex::Real hint_ilojlo = ( p.pos(2) - ht[0] ) / (ht[1] - ht[0]); - amrex::Real hint_ilojhi = ( p.pos(2) - ht[2] ) / (ht[3] - ht[2]); - amrex::Real hint_ihijlo = ( p.pos(2) - ht[4] ) / (ht[5] - ht[4]); - amrex::Real hint_ihijhi = ( p.pos(2) - ht[6] ) / (ht[7] - ht[6]); - - amrex::Real sz[] = { amrex::Real(1.) - hint_ilojlo, - amrex::Real(1.) - hint_ihijlo, - amrex::Real(1.) - hint_ilojhi, - amrex::Real(1.) - hint_ihijhi, - hint_ilojlo, - hint_ihijlo, - hint_ilojhi, - hint_ihijhi}; + amrex::Real hint_ilojlo = ( p.pos(2) - ht[0] ) / (ht[1] - ht[0]); + amrex::Real hint_ilojhi = ( p.pos(2) - ht[2] ) / (ht[3] - ht[2]); + amrex::Real hint_ihijlo = ( p.pos(2) - ht[4] ) / (ht[5] - ht[4]); + amrex::Real hint_ihijhi = ( p.pos(2) - ht[6] ) / (ht[7] - ht[6]); + + amrex::Real sz[] = { amrex::Real(1.) - hint_ilojlo, + amrex::Real(1.) - hint_ihijlo, + amrex::Real(1.) - hint_ilojhi, + amrex::Real(1.) - hint_ihijhi, + hint_ilojlo, + hint_ihijlo, + hint_ilojhi, + hint_ihijhi}; #endif for (int comp = start_comp; comp < ncomp; ++comp) { val[ctr] = amrex::ParticleReal(0.); #if (AMREX_SPACEDIM == 2) - // interpolate 4 points in y int k0 = 0; int sy_ctr = 0; for (int jj = 0; jj <= 1; ++jj) { @@ -378,7 +390,6 @@ void linear_interpolate_to_particle_z (const P& p, } } #elif (AMREX_SPACEDIM == 3) - // not tested yet int sz_ctr = 0; for (int kk = 0; kk <= 1; ++kk) { for (int jj = 0; jj <= 1; ++jj) { From d121723c7491acedf8e2e010cfb1bf3773224b95 Mon Sep 17 00:00:00 2001 From: AlexanderSinn <64009254+AlexanderSinn@users.noreply.github.com> Date: Wed, 24 Jan 2024 00:25:29 +0100 Subject: [PATCH 101/186] Add TypeMultiplier, MakeZeroTuple and IdentityTuple (#3718) ## Summary This PR adds TypeMultiplier and MakeZeroTuple from https://github.com/Hi-PACE/hipace/pull/1052 to AMReX. TypeMultiplier can be used to shorten ReduceOps and ReduceData definitions where the same type is used many times. MakeZeroTuple can be used to initialize a GpuTuple to zero (the default constructor would leave the values uninitialized). IdentityTuple can be used to initialize a GpuTuple to the identity elements of each operation in a ReduceOps. ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [x] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --- Src/Base/AMReX_Reduce.H | 32 +++++++++++++++++++++++-- Src/Base/AMReX_Tuple.H | 14 +++++++++++ Src/Base/AMReX_TypeList.H | 49 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 92 insertions(+), 3 deletions(-) diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index ae8ae3ca0c1..aa1a4dd0122 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -67,14 +67,14 @@ namespace Reduce::detail { template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void for_each_init (T& t) + constexpr void for_each_init (T& t) { P().init(amrex::get(t)); } template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void for_each_init (T& t) + constexpr void for_each_init (T& t) { P().init(amrex::get(t)); for_each_init(t); @@ -1275,6 +1275,34 @@ bool AnyOf (Box const& box, P&&pred) #endif +/** + * \brief Return a GpuTuple containing the identity element for each operation in ReduceOps. + * For example 0, +inf and -inf for ReduceOpSum, ReduceOpMin and ReduceOpMax respectively. + */ +template +AMREX_GPU_HOST_DEVICE +constexpr GpuTuple +IdentityTuple (GpuTuple, ReduceOps) noexcept +{ + GpuTuple r{}; + Reduce::detail::for_each_init<0, decltype(r), Ps...>(r); + return r; +} + +/** + * \brief Return a GpuTuple containing the identity element for each ReduceOp in TypeList. + * For example 0, +inf and -inf for ReduceOpSum, ReduceOpMin and ReduceOpMax respectively. + */ +template +AMREX_GPU_HOST_DEVICE +constexpr GpuTuple +IdentityTuple (GpuTuple, TypeList) noexcept +{ + GpuTuple r{}; + Reduce::detail::for_each_init<0, decltype(r), Ps...>(r); + return r; +} + } #endif diff --git a/Src/Base/AMReX_Tuple.H b/Src/Base/AMReX_Tuple.H index 5cdfbee634f..93b3136aa6d 100644 --- a/Src/Base/AMReX_Tuple.H +++ b/Src/Base/AMReX_Tuple.H @@ -378,6 +378,20 @@ ForwardAsTuple (Ts&&... args) noexcept return GpuTuple(std::forward(args)...); } +// MakeZeroTuple + +/** + * \brief Return a GpuTuple containing all zeros. + * Note that a default-constructed GpuTuple can have uninitialized values. + */ +template +AMREX_GPU_HOST_DEVICE +constexpr GpuTuple +MakeZeroTuple (GpuTuple) noexcept +{ + return GpuTuple(static_cast(0)...); +} + } #endif /*AMREX_TUPLE_H_*/ diff --git a/Src/Base/AMReX_TypeList.H b/Src/Base/AMReX_TypeList.H index 3bd0a837069..8aab1184141 100644 --- a/Src/Base/AMReX_TypeList.H +++ b/Src/Base/AMReX_TypeList.H @@ -93,7 +93,7 @@ ForEach (TypeList, F&& f) // dst and src are either MultiFab or fMultiFab auto tt = CartesianProduct(TypeList{}, TypeList{}); - bool r = ForEachUtil(tt, [&] (auto t) -> bool + bool r = ForEachUntil(tt, [&] (auto t) -> bool { using MF0 = TypeAt<0,decltype(t)>; using MF1 = TypeAt<1,decltype(t)>; @@ -151,6 +151,53 @@ constexpr auto CartesianProduct (Ls...) { return (TypeList>{} * ... * Ls{}); } +namespace detail { + // return TypeList by using the fast power algorithm + template + constexpr auto SingleTypeMultiplier_impl () { + if constexpr (N == 0) { + return TypeList<>{}; + } else if constexpr (N == 1) { + return TypeList{}; + } else if constexpr (N % 2 == 0) { + return SingleTypeMultiplier_impl() + SingleTypeMultiplier_impl(); + } else { + return SingleTypeMultiplier_impl() + TypeList{}; + } + } + + // overload of SingleTypeMultiplier for multiple types: + // convert T[N] to T, T, T, T, ... (N times with N >= 1) + template + constexpr auto SingleTypeMultiplier (const T (&)[N]) { + return SingleTypeMultiplier_impl(); + } + + // overload of SingleTypeMultiplier for one regular type + template + constexpr auto SingleTypeMultiplier (T) { + return TypeList{}; + } + + // apply the types of the input TypeList as template arguments to TParam + template