diff --git a/.github/workflows/catalyst.yml b/.github/workflows/catalyst.yml new file mode 100644 index 00000000000..108b76dff10 --- /dev/null +++ b/.github/workflows/catalyst.yml @@ -0,0 +1,33 @@ +name: 🐧 Catalyst + +on: [push, pull_request] + +concurrency: + group: ${{ github.ref }}-${{ github.head_ref }}-insituvis-catalyst + cancel-in-progress: true + +jobs: + catalyst: + name: Catalyst + runs-on: ubuntu-22.04 + if: github.event.pull_request.draft == false + env: + CXX: g++ + CC: gcc + CMAKE_PREFIX_PATH: "/opt/conduit:/opt/catalyst" + container: + image: kitware/paraview:ci-catalyst-amrex-warpx-20240701 + steps: + - uses: actions/checkout@v4 + - name: Configure + run: | + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Debug \ + -DAMReX_ENABLE_TESTS=ON \ + -DAMReX_FORTRAN=OFF \ + -DAMReX_CATALYST=ON \ + -DAMReX_CONDUIT=ON + - name: Build + run: | + cmake --build build -j 4 + diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index c996db63c1c..d02342899b8 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -126,6 +126,53 @@ jobs: ccache -s du -hs ~/.cache/ccache + tests_cxx20: + name: Clang C++20 [tests] + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Dependencies + run: | + .github/workflows/dependencies/dependencies.sh + .github/workflows/dependencies/dependencies_clang.sh 15 + .github/workflows/dependencies/dependencies_ccache.sh + - name: Set Up Cache + uses: actions/cache@v4 + with: + path: ~/.cache/ccache + key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} + restore-keys: | + ccache-${{ github.workflow }}-${{ github.job }}-git- + - name: Build & Install + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} + run: | + export CCACHE_COMPRESS=1 + export CCACHE_COMPRESSLEVEL=10 + export CCACHE_MAXSIZE=100M + export CCACHE_EXTRAFILES=${{ github.workspace }}/.clang-tidy + export CCACHE_LOGFILE=${{ github.workspace }}/ccache.log.txt + ccache -z + + mkdir build + cd build + cmake .. \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_VERBOSE_MAKEFILE=ON \ + -DAMReX_EB=ON \ + -DAMReX_ENABLE_TESTS=ON \ + -DAMReX_FORTRAN=OFF \ + -DAMReX_MPI=ON \ + -DCMAKE_CXX_STANDARD=20 \ + -DCMAKE_C_COMPILER=$(which clang-15) \ + -DCMAKE_CXX_COMPILER=$(which clang++-15) \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + make -j 4 + + ctest --output-on-failure + + ccache -s + du -hs ~/.cache/ccache + # Build 2D libamrex with configure configure-2d: name: Clang NOMPI Release [configure 2D] diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 7e4fe8d40bf..5d8c9ad9c30 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -641,7 +641,7 @@ jobs: - name: Run tests run: | cd build - ctest --output-on-failure -R + ctest --output-on-failure test_hdf5: name: GNU@9.3 HDF5 I/O Test [tests] diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 168670eda30..4d1612d0ec4 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -24,7 +24,11 @@ jobs: restore-keys: | ccache-${{ github.workflow }}-${{ github.job }}-git- - name: Build & Install - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} + # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array] + # 25 | const char* const kernel_names[] = { + # | ^ + # 1 error generated. + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-zero-length-array"} run: | export CCACHE_COMPRESS=1 export CCACHE_COMPRESSLEVEL=10 @@ -68,7 +72,11 @@ jobs: restore-keys: | ccache-${{ github.workflow }}-${{ github.job }}-git- - name: Build & Install - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} + # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array] + # 25 | const char* const kernel_names[] = { + # | ^ + # 1 error generated. + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-zero-length-array"} run: | export CCACHE_COMPRESS=1 export CCACHE_COMPRESSLEVEL=10 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 4090f9c2ec0..e0e7bf69118 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -87,16 +87,17 @@ jobs: cmake -S . -B build ` -DCMAKE_VERBOSE_MAKEFILE=ON ` + -DCMAKE_BUILD_TYPE=RelWithDebInfo ` -DCMAKE_CXX_FLAGS=" /D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR" ` -DAMReX_EB=ON ` -DAMReX_ENABLE_TESTS=ON ` -DAMReX_FORTRAN=OFF ` -DAMReX_MPI=OFF #-DCMAKE_CXX_COMPILER_LAUNCHER=ccache - cmake --build build --config RelWithDebInfo -j 4 + cmake --build build -j 4 - cmake --build build --config RelWithDebInfo --target install - cmake --build build --config RelWithDebInfo --target test_install + cmake --build build --target install + cmake --build build --target test_install #ccache -s diff --git a/CHANGES b/CHANGES index 0c6b093820c..d5e4dc8ab4b 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,70 @@ +# 24.08 + + -- Catalyst support (#4011) + + -- Fix typos in FaceDivFree interior interpolation. (#4048) + + -- ParmParse: Read IntVect & RealVect from array (#4050) + ParmParse: Fix assertion in new queryarr for IntVect & RealVect (#4051) + + -- IParser: Use long long int (#4046) + + -- ParmParse: Math expression support (#4032) + + -- AmrData: remove dependency on Fortran (#4049) + + -- Remove reliance on managed memory in HDF5 mesh IO (#4047) + + -- New Feature in MLEBNodeFDLaplacian: MultiFab sigma coefficient (#3968) + + -- Nodal Solver: Use multi-color Gauss-Seidel on GPU (#4043) + + -- Use reduction to compute min and max particle distances in NeighborParticles test. (#3212) + + -- ParmParse: Refactoring II (#4035) + + -- Fix deadlock for CUDA (#4044) + + -- ParmParse: Refactoring (#4031) + + -- MLMG: Keep ghost cell values in user's inputs (#4040) + + -- Geometry: Fix operator>> (#4042) + + -- FillPatch for Face Data: Fix Geometry used in creating mask (#4039) + + -- Parser: Allow dots in variable names (#4038) + + -- HDF5: Remove unnecessary casting (#4036) + + -- HDF5: Fix chunking warning (#4033) (#4034) + + -- New parameter amrex::init_snan (#4030) + + -- Change the default max_grid_size in 3D on GPU from 32 to 64 (#4029) + + -- GpuArray: add operator+= (#4028) + + -- FPE traps: Add functions for enabling and disabling FPE traps (#4025) + + -- HDF5: Correctly determine max_mfi_count (#4024) + + -- HDF5: expose ZLIB compression without SZ or ZFP flags (#4015) + + -- Hypre: Use Gpu::hypreSynchronize (#4022) + + -- AmrMesh: add const to a few functions (#4021) + + -- Convexify AMR data (#4013) + + -- Fix BottomSolver::bicgcg (#4020) + + -- Fix some warnings issued by Coverity (#4017) + + -- convert IndexType to IndexTypeND (#3988) + + -- Workaround MSVC issue in tupleToArray (#4000) + # 24.07 -- HDF5: Correctly handle writing only some components when writing Particle (#4005) diff --git a/Docs/sphinx_documentation/source/Basics.rst b/Docs/sphinx_documentation/source/Basics.rst index 5887df56fb5..97e68fd1e68 100644 --- a/Docs/sphinx_documentation/source/Basics.rst +++ b/Docs/sphinx_documentation/source/Basics.rst @@ -224,17 +224,17 @@ file is a series of definitions in the form of ``prefix.name = value value ....`` For each line, text after # are comments. Here is an example inputs file. - .. highlight:: python +.. highlight:: python - :: +:: - nsteps = 100 # integer - nsteps = 1000 # nsteps appears a second time - dt = 0.03 # floating point number - ncells = 128 64 32 # a list of 3 ints - xrange = -0.5 0.5 # a list of 2 reals - title = "Three Kingdoms" # a string - hydro.cfl = 0.8 # with prefix, hydro + nsteps = 100 # integer + nsteps = 1000 # nsteps appears a second time + dt = 0.03 # floating point number + ncells = 128 64 32 # a list of 3 ints + xrange = -0.5 0.5 # a list of 2 reals + title = "Three Kingdoms" # a string + hydro.cfl = 0.8 # with prefix, hydro The following code shows how to use :cpp:`ParmParse` to get/query the values. @@ -275,6 +275,200 @@ by default returns the last one. The difference between :cpp:`query` and get the value, whereas :cpp:`query` returns an error code without generating a runtime error that will abort the run. +Math Expressions +---------------- + +.. versionadded:: 24.08 + Math expression support in :cpp:`ParmParse`. + +:cpp:`ParmParse` supports math expressions for integers and floating point +numbers. For example, + +.. highlight:: python + +:: + + # three numbers. whitespaces inside `""` are okay. + f = 3+4 99 "5 + 6" + + # two numbers. `\` is for continuation + g = 3.1+4.1 \ + 5.0+6.6 + + # two numbers unless using [query|get]WithParser + w = 1 -2 + + my_constants.alpha = 5. + amrex.c = c + + # must use [query|get]WithParser + amrex.foo = sin( pi/2 ) + alpha + -amrex.c**2/c^2 + + # either [query|get] or [query|get]WithParser is okay + amrex.bar = sin(pi/2)+alpha+-amrex.c**2/c^2 + + geom.prob_lo = 2*sin(pi/4)/sqrt(2) sin(pi/2)+cos(pi/2) -(sin(pi*3/2)+cos(pi*3/2)) + + # three numbers. `\` is for continuation + geom.prob_hi = "2*sin(pi/4)/sqrt(2)" \ + "sin(pi/2) + cos(pi/2)" \ + -(sin(pi*3/2)+cos(pi*3/2)) + +can be processed by + +.. highlight:: c++ + +:: + + { + ParmParse::SetParserPrefix("physical_constants"); + ParmParse pp("physical_constants"); + pp.add("c", 299792458.); + pp.add("pi", 3.14159265358979323846); + } + { + ParmParse pp; + + double f0 = -1; + pp.query("f", f0); + std::cout << " double f = " << f0 << '\n'; + + std::vector f; + pp.queryarr("f", f); + std::cout << " int f[3] = {" << f[0] << ", " << f[1] << ", " + << f[2] << "}\n"; + + std::vector g; + pp.queryarr("g", g); + std::cout << " double g[] = " << g[0] << " " << g[1] << '\n'; + + double w; + pp.query("w", w); + std::cout << " w = " << w << " with query\n"; + pp.queryWithParser("w", w); + std::cout << " w = " << w << " with queryParser\n"; + } + { + ParmParse pp("amrex", "my_constants"); + double foo = -1, bar; + pp.getWithParser("foo", foo); + pp.get("bar", bar); + std::cout << " foo = " << foo << ", bar = " << bar << '\n'; + } + { + ParmParse pp; + std::array prob_lo, prob_hi; + pp.get("geom.prob_lo", prob_lo); + pp.get("geom.prob_hi", prob_hi); + std::cout << " double prob_lo[] = {" << prob_lo[0] << ", " + << prob_lo[1] << ", " << prob_lo[2] << "}\n" + << " double prob_hi[] = {" << prob_hi[0] << ", " + << prob_hi[1] << ", " << prob_hi[2] << "}\n"; + } + +The results will be + +.. highlight:: console + +:: + + double f = 7 + int f[3] = {7, 99, 11} + double g[] = 7.2 11.6 + w = 1 with query + w = -1 with queryParser + foo = 5, bar = 5 + double prob_lo[] = {1, 1, 1} + double prob_hi[] = {1, 1, 1} + +Note that the empty spaces are significant for math expressions unless they +are inside a pair of ``"`` or explicitly parsed by +:cpp:`ParmParse::queryWithParser` or :cpp:`ParmParse::getWithParser`. If the +expression contains another variable, it will be looked up by +:cpp:`ParmParse`. :cpp:`ParmParse`'s constructor accepts an optional second +argument, ``parser_prefix``. When a variable in a math expression is being +looked up, it will first try to find it by using the exact name of the +variable. If this attempt fails and the :cpp:`ParmParse` object has a +non-empty non-static member ``parser_prefix``, it will try again, this time +looking up the variable by prefixing its name with the value of +``parser_prefix`` followed by a ``.``. If this attempt also fails and the +:cpp:`ParmParse` class has a non-empty static member ``ParserPrefix`` (which +can be set by :cpp:`ParmParse::SetParserPrefix`), it will try again, this +time looking up the variable by prefixing its name with the value of +``ParserPrefix`` followed by a ``.``. + +The variables in :cpp:`ParmParse` math expressions are not evaluated until +they are referenced. If a variable is defined multiple times, the last +occurrence will override previous ones even if it appears after the variable +has been referenced. This behavior is demonstrated in the following example. + +.. highlight:: python + +:: + + foo.a = 1 + foo.b = foo.a + foo.a = 2 + +will become + +.. highlight:: python + +:: + + foo.a = 2 + foo.b = 2 + +Enum Class +---------- + +.. versionadded:: 24.09 + Enum class support in :cpp:`ParmParse`. + +AMReX provides a macro :cpp:`AMREX_ENUM` for defining :cpp:`enum class` that +supports reflection. For example, + +.. highlight:: c++ + +:: + + AMREX_ENUM(MyColor, red, green, blue); + + void f () + { + MyColor color = amrex::getEnum("red"); // MyColor::red + std::string name = amrex::getEnumNameString(MyColor::blue); // "blue" + std::vector names = amrex::getEnumNameStrings(); + // names = {"red", "green", "blue"}; + std::string class_name = amrex::getEnumClassName(); // "MyColor" + } + +This allows us to read :cpp:`ParmParse` parameters into enum class objects. + +.. highlight:: python + +:: + + color1 = red + color2 = BLue + +The following code shows how to query the enumerators. + +.. highlight:: c++ + +:: + + AMREX_ENUM(MyColor, none, red, green, blue); + + void f (MyColor& c1, MyColor& c2) + { + ParmParse pp; + pp.query("color1", c1); // c1 becomes MyColor::red + pp.query_enum_case_insensitive("color2", c2); // c2 becomes MyColor::blue + MyColor default_color; // MyColor::none + pp.query("color3", default_color); // Still MyColor::none + } + Overriding Parameters with Command-Line Arguments ------------------------------------------------- @@ -453,7 +647,14 @@ Besides :cpp:`amrex::Parser` for floating point numbers, AMReX also provides similarity, but floating point number specific functions (e.g., ``sqrt``, ``sin``, etc.) are not supported in ``IParser``. In addition to ``/`` whose result truncates towards zero, the integer parser also supports ``//`` whose -result truncates towards negative infinity. +result truncates towards negative infinity. Single quotes ``'`` are allowed +as a separator for :cpp:`IParser` numbers just like C++ integer +literals. Additionally, a floating point like number with a positive +exponent may be accepted as an integer if it is reasonable to do so. For +example, it's okay to have ``1.234e3``, but ``1.234e2`` is an error. + + .. versionadded:: 24.08 + Support for ``'`` and ``e`` in :cpp:`IParser` integers. .. _sec:basics:initialize: @@ -912,7 +1113,7 @@ an :cpp:`int` pointer or array specifying periodicity. If a :cpp:`RealBox` is no given in the first constructor, AMReX will construct one based on :cpp:`ParmParse` parameters, ``geometry.prob_lo`` / ``geometry.prob_hi`` / ``geometry.prob_extent``, where each of the parameter is an array of ``AMREX_SPACEDIM`` real numbers. -See the section on :ref:`sec:inputs:pd` for more details about how to specify these. +See the section on :ref:`sec:inputs:geom` for more details about how to specify these. The argument for coordinate system is an integer type with valid values being 0 (Cartesian), or 1 (cylindrical), or 2 (spherical). If it diff --git a/Docs/sphinx_documentation/source/BuildingAMReX.rst b/Docs/sphinx_documentation/source/BuildingAMReX.rst index 3176730320e..90fb4d6eb30 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX.rst @@ -508,6 +508,8 @@ The list of available options is reported in the :ref:`table ` bel +------------------------------+-------------------------------------------------+-------------------------+-----------------------+ | AMReX_CONDUIT | Enable Conduit support | NO | YES, NO | +------------------------------+-------------------------------------------------+-------------------------+-----------------------+ + | AMReX_CATALYST | Enable Catalyst support | NO | YES, NO | + +------------------------------+-------------------------------------------------+-------------------------+-----------------------+ | AMReX_ASCENT | Enable Ascent support | NO | YES, NO | +------------------------------+-------------------------------------------------+-------------------------+-----------------------+ | AMReX_HYPRE | Enable HYPRE interfaces | NO | YES, NO | diff --git a/Docs/sphinx_documentation/source/IO.rst b/Docs/sphinx_documentation/source/IO.rst index 60fb0568851..185b48955d7 100644 --- a/Docs/sphinx_documentation/source/IO.rst +++ b/Docs/sphinx_documentation/source/IO.rst @@ -156,7 +156,7 @@ Currently supported compression libraries include `SZ`_ and `ZFP`_. To enable HDF5 output, AMReX must be compiled and linked to an HDF5 library with parallel I/O support, by adding ``USE_HDF5=TRUE`` and ``HDF5_HOME=/path/to/hdf5/install/dir`` to the GNUMakefile. -many HPC systems have an HDF5 module available that can be loaded with +Many HPC systems have an HDF5 module available that can be loaded with ``module load hdf5`` or ``module load cray-hdf5-parallel``. To download and compile HDF5 from source code, please go to `HDF5 Download`_ webpage and follow the instructions (latest version is recommended and remember @@ -199,7 +199,7 @@ chapter on :ref:`Chap:Visualization`) HDF5 Plotfile Compression ------------------------- -To enable data compression on the HDF5 datasets, the corresponding compression +To enable SZ or ZFP data compression on the HDF5 datasets, the corresponding compression library and its HDF5 plugin must be available. To compile `SZ`_ or `ZFP`_ plugin, please refer to their documentation: `H5Z-SZ`_ and `H5Z-ZFP`_, and adding ``USE_HDF5_SZ=TRUE``, ``SZ_HOME=``, or ``USE_HDF5_ZFP=TRUE``, ``ZFP_HOME=``, @@ -210,12 +210,18 @@ please refer to their documentation: `H5Z-SZ`_ and `H5Z-ZFP`_, and adding .. _`H5Z-SZ`: https://github.com/szcompressor/SZ/tree/master/hdf5-filter/H5Z-SZ .. _`H5Z-ZFP`: https://github.com/LLNL/H5Z-ZFP +ZLIB compression is available without external libraries or other make flags. +Different compression levels (at the cost of read/write time) can be used, just +like GZIP. + The string argument :cpp:`compression` in the above two functions controls whether to enable data compression and its parameters. Currently supported options include: * No compression * ``None@0`` +* ZLIB compression + * ``ZLIB@compression_level`` * SZ compression * ``SZ@/path/to/sz.config`` * ZFP compression @@ -224,6 +230,9 @@ options include: * ``ZFP_ACCURACY@accuracy`` * ``ZFP_REVERSIBLE@reversible`` +Using compression requires data to be stored in a chunked format. The size of these +chunks can (and generally should) be configured by changing the ``HDF5_CHUNK_SIZE`` +environment variable, with a default value of 1024 elements provided. HDF5 Asynchronous Output ------------------------ diff --git a/Docs/sphinx_documentation/source/InputsCheckpoint.rst b/Docs/sphinx_documentation/source/InputsCheckpoint.rst deleted file mode 100644 index 13bae0e467f..00000000000 --- a/Docs/sphinx_documentation/source/InputsCheckpoint.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. _Chap:InputsCheckpoint: - -Checkpoint/Restart -================== - -The following inputs must be preceded by "amr" and control checkpoint/restart. - -+------------------+-----------------------------------------------------------------------+-------------+-----------+ -| | Description | Type | Default | -+==================+=======================================================================+=============+===========+ -| restart | If present, then the name of file to restart from | String | None | -+------------------+-----------------------------------------------------------------------+-------------+-----------+ -| check_int | Frequency of checkpoint output; | Int | -1 | -| | if -1 then no checkpoints will be written | | | -+------------------+-----------------------------------------------------------------------+-------------+-----------+ -| check_file | Prefix to use for checkpoint output | String | chk | -+------------------+-----------------------------------------------------------------------+-------------+-----------+ - diff --git a/Docs/sphinx_documentation/source/InputsComputeBackends.rst b/Docs/sphinx_documentation/source/InputsComputeBackends.rst deleted file mode 100644 index 26e5d527508..00000000000 --- a/Docs/sphinx_documentation/source/InputsComputeBackends.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. _Chap:InputsComputeBackends: - -Compute Backends -================ - -The following inputs must be preceded by ``amrex.`` and determine runtime options of CPU or GPU compute implementations. - -+------------------------+-----------------------------------------------------------------------+-------------+------------+ -| Parameter | Description | Type | Default | -+========================+=======================================================================+=============+============+ -| ``omp_threads`` | If OpenMP is enabled, this can be used to set the default number of | String | ``system`` | -| | threads. The special value ``nosmt`` can be used to avoid using | or Int | | -| | threads for virtual cores (aka Hyperthreading or SMT), as is default | | | -| | in OpenMP, and instead only spawns threads equal to the number of | | | -| | physical cores in the system. | | | -| | For the values ``system`` and ``nosmt``, the environment variable | | | -| | ``OMP_NUM_THREADS`` takes precedence. For Integer values, | | | -| | ``OMP_NUM_THREADS`` is ignored. | | | -+------------------------+-----------------------------------------------------------------------+-------------+------------+ - -For GPU-specific parameters, see also the :ref:`GPU chapter `. diff --git a/Docs/sphinx_documentation/source/InputsLoadBalancing.rst b/Docs/sphinx_documentation/source/InputsLoadBalancing.rst deleted file mode 100644 index ea7ff2ebaae..00000000000 --- a/Docs/sphinx_documentation/source/InputsLoadBalancing.rst +++ /dev/null @@ -1,56 +0,0 @@ -.. _Chap:InputsLoadBalancing: - -Gridding and Load Balancing -=========================== - -The following inputs must be preceded by "amr" and determine how we create the grids and how often we regrid. - -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| Parameter | Description | Type | Default | -+========================+=======================================================================+=============+===========+ -| regrid_int | How often to regrid (in number of steps at level 0) | Int | -1 | -| | if regrid_int = -1 then no regridding will occur | | | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| max_grid_size_x | Maximum number of cells at level 0 in each grid in x-direction | Int | 32 | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| max_grid_size_y | Maximum number of cells at level 0 in each grid in y-direction | Int | 32 | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| max_grid_size_z | Maximum number of cells at level 0 in each grid in z-direction | Int | 32 | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| blocking_factor_x | Each grid must be divisible by blocking_factor_x in x-direction | Int | 8 | -| | (must be 1 or power of 2) | | | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| blocking_factor_y | Each grid must be divisible by blocking_factor_y in y-direction | Int | 8 | -| | (must be 1 or power of 2) | | | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| blocking_factor_z | Each grid must be divisible by blocking_factor_z in z-direction | Int | 8 | -| | (must be 1 or power of 2) | | | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| refine_grid_layout | Split grids in half until the number of grids is no less than the | Bool | true | -| | number of procs. (Will be overridden if refine_grid_layout_[x,y,z] | | | -| | is specified) | | | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| refine_grid_layout_x | Allow grids to be split in the x-dimension when refining the layout. | Int | 1 | -| | (1 to allow or 0 to disallow) | | | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| refine_grid_layout_y | Allow grids to be split in the y-dimension when refining the layout. | Int | 1 | -| | (1 to allow or 0 to disallow) | | | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ -| refine_grid_layout_z | Allow grids to be split in the z-dimension when refining the layout. | Int | 1 | -| | (1 to allow or 0 to disallow) | | | -+------------------------+-----------------------------------------------------------------------+-------------+-----------+ - -The following inputs must be preceded by "particles". - -+-------------------+-----------------------------------------------------------------------+-------------+-----------+ -| Parameter | Description | Type | Default | -+===================+=======================================================================+=============+===========+ -| max_grid_size_x | Maximum number of cells at level 0 in each grid in x-direction | Int | 32 | -| | for grids in the ParticleBoxArray if dual_grid is true | | | -+-------------------+-----------------------------------------------------------------------+-------------+-----------+ -| max_grid_size_y | Maximum number of cells at level 0 in each grid in y-direction | Int | 32 | -| | for grids in the ParticleBoxArray if dual_grid is true | | | -+-------------------+-----------------------------------------------------------------------+-------------+-----------+ -| max_grid_size_z | Maximum number of cells at level 0 in each grid in z-direction | Int | 32 | -| | for grids in the ParticleBoxArray if dual_grid is true. | | | -+-------------------+-----------------------------------------------------------------------+-------------+-----------+ diff --git a/Docs/sphinx_documentation/source/InputsPlotFiles.rst b/Docs/sphinx_documentation/source/InputsPlotFiles.rst deleted file mode 100644 index 9e8789a90ac..00000000000 --- a/Docs/sphinx_documentation/source/InputsPlotFiles.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. _Chap:InputsPlotfiles: - -Plotfiles and Other Output -========================== - -The following inputs must be preceded by "amr" and control the frequency and naming of plotfile generation, as well -as whether a plotfile should be written out immediately after restarting a simulation. - -+---------------------+-----------------------------------------------------------------------+-------------+-----------+ -| | Description | Type | Default | -+=====================+=======================================================================+=============+===========+ -| plot_int | Frequency of plotfile output; | Int | -1 | -| | if -1 then no plotfiles will be written | | | -+---------------------+-----------------------------------------------------------------------+-------------+-----------+ -| plotfile_on_restart | Should we write a plotfile when we restart (only used if plot_int>0) | Bool | 0 (false) | -+---------------------+-----------------------------------------------------------------------+-------------+-----------+ -| plot_file | Prefix to use for plotfile output | String | plt | -+---------------------+-----------------------------------------------------------------------+-------------+-----------+ diff --git a/Docs/sphinx_documentation/source/InputsProblemDefinition.rst b/Docs/sphinx_documentation/source/InputsProblemDefinition.rst deleted file mode 100644 index 3e67ffdb38e..00000000000 --- a/Docs/sphinx_documentation/source/InputsProblemDefinition.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. _sec:inputs:pd: - -Problem Definition -================== - -The following inputs must be preceded by "amr." - -+-------------------+-----------------------------------------------------------------------+-------------+-----------+ -| | Description | Type | Default | -+===================+=======================================================================+=============+===========+ -| n_cell | Number of cells at level 0 in each coordinate direction | Int Int Int | None | -+-------------------+-----------------------------------------------------------------------+-------------+-----------+ -| max_level | Maximum level of refinement allowed (0 when single-level) | Int | None | -+-------------------+-----------------------------------------------------------------------+-------------+-----------+ - -The following inputs must be preceded by "geometry." - -+-----------------+-----------------------------------------------------------------------+-------------+-----------+ -| | Description | Type | Default | -+=================+=======================================================================+=============+===========+ -| coord_sys | 0 for Cartesian | Int | 0 | -+-----------------+-----------------------------------------------------------------------+-------------+-----------+ -| is_periodic | 1 for true, 0 for false (one value for each coordinate direction) | Ints | 0 0 0 | -+-----------------+-----------------------------------------------------------------------+-------------+-----------+ -| prob_lo | Low corner of physical domain (physical not index space) | Reals | 0 0 0 | -+-----------------+-----------------------------------------------------------------------+-------------+-----------+ -| prob_hi | High corner of physical domain (physical not index space) | Reals | None | -+-----------------+-----------------------------------------------------------------------+-------------+-----------+ -| prob_extent | Extent of physical domain (physical not index space) | Reals | None | -+-----------------+-----------------------------------------------------------------------+-------------+-----------+ - -Note that internally ``prob_lo`` and ``prob_hi`` are the variables carried by the ``Geometry`` class. -In the inputs file (or command line), one can specify -1) ``geometry.prob_hi`` only or -2) ``geometry.prob_extent`` only or -3) ``geometry.prob_lo`` and ``geometry.prob_hi`` or -4) ``geometry.prob_lo`` and ``geometry.prob_extent``. -If ``geometry.prob_lo`` is not specified then it will be 0 in each coordinate direction. -If ``geometry.prob_extent`` is specified (and ``geometry.prob_hi`` is not) then internally -"prob_hi" will be set to "prob_lo" + "prob_extent". - diff --git a/Docs/sphinx_documentation/source/InputsTimeStepping.rst b/Docs/sphinx_documentation/source/InputsTimeStepping.rst deleted file mode 100644 index 144ef0fe6dd..00000000000 --- a/Docs/sphinx_documentation/source/InputsTimeStepping.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. sec:InputsTimeStepping: - -Time Stepping -============= - -The following inputs must be preceded by "amr." Note that if both are specified, both criteria -are used and the simulation still stop when the first criterion is hit. In the case of unsteady flow, -the simulation will stop when either the number of steps reaches max_step or time reaches stop_time. -In the case of unsteady flow, the simulation will stop when either the tolerance (difference between -subsequent steps) is reached or the number of iterations reaches the maximum number specified. - -+------------------+-----------------------------------------------------------------------+-------------+-----------+ -| | Description | Type | Default | -+==================+=======================================================================+=============+===========+ -| max_step | Maximum number of time steps to take | Int | -1 | -+------------------+-----------------------------------------------------------------------+-------------+-----------+ -| stop_time | Maximum time to reach | Real | -1.0 | -+------------------+-----------------------------------------------------------------------+-------------+-----------+ diff --git a/Docs/sphinx_documentation/source/Inputs_Chapter.rst b/Docs/sphinx_documentation/source/Inputs_Chapter.rst deleted file mode 100644 index 43ead40b3c6..00000000000 --- a/Docs/sphinx_documentation/source/Inputs_Chapter.rst +++ /dev/null @@ -1,15 +0,0 @@ -.. _Chap:Inputs: - -Run-time Inputs -=============== - -.. toctree:: - :maxdepth: 1 - - InputsProblemDefinition - InputsTimeStepping - InputsLoadBalancing - InputsComputeBackends - InputsPlotFiles - InputsCheckpoint - diff --git a/Docs/sphinx_documentation/source/LinearSolvers.rst b/Docs/sphinx_documentation/source/LinearSolvers.rst index ab0ba3506a9..ee3ebe0efcf 100644 --- a/Docs/sphinx_documentation/source/LinearSolvers.rst +++ b/Docs/sphinx_documentation/source/LinearSolvers.rst @@ -568,6 +568,19 @@ residual correction form of the original problem. To build Hypre, follow the nex 5.- Create an environment variable with the HYPRE directory -- HYPRE_DIR=/hypre_path/hypre/src/hypre +To use Hypre with CUDA, nvcc compiler is needed along with all other requirements for CPU (e.g. gcc, mpicc). It is very important that the GPU architecture for Hypre matches with that of AMReX. By default, Hypre assumes its architecture number to be 70 and it is best to build Hypre for multiple architectures by specifying multiple compute capability numbers (e.g. 80 and 90). + +:: + + 1.- git clone https://github.com/hypre-space/hypre.git + 2.- cd hypre/src + 3.- ./configure --with-cuda -—with-gpu-arch=’80 90' + (you can figure out the gpu arch from command line using + nvidia-smi --query-gpu=compute_cap --format=csv, if it gives 9.0, gpu-arch is 90) + 4.- make install + 5.- Create an environment variable with the HYPRE directory -- + HYPRE_DIR=/hypre_path/hypre/src/hypre + To use hypre, one must include ``amrex/Src/Extern/HYPRE`` in the build system. For examples of using hypre, we refer the reader to `ABecLaplacian`_ or `NodeTensorLap`_. diff --git a/Docs/sphinx_documentation/source/RuntimeParameters.rst b/Docs/sphinx_documentation/source/RuntimeParameters.rst new file mode 100644 index 00000000000..c57c74c8fb1 --- /dev/null +++ b/Docs/sphinx_documentation/source/RuntimeParameters.rst @@ -0,0 +1,1238 @@ + +.. _chap:inputs: + +Runtime Parameters +================== + +.. role:: cpp(code) + :language: c++ + +This chapter contains a list of AMReX :cpp:`ParmParse` runtime parameters +and their **default** values. They can be set by either including them in an +inputs file, or specifying them at the command line, or passing a function +to :cpp:`amrex::Initialize` and the function adds parameters to AMReX's +:cpp:`ParmParse`'s parameter database. For more information on +:cpp:`ParmParse`, see :ref:`sec:basics:parmparse`. + +.. important:: AMReX reserves the following prefixes in :cpp:`ParmParse` + parameters: ``amr``, ``amrex``, ``blprofiler``, ``device``, + ``DistributionMapping``, ``eb2``, ``fab``, ``fabarray``, + ``geometry``, ``particles``, ``tiny_profiler``, and + ``vismf``. + +AMR +--- + +AMReX applications with AMR use either :cpp:`class AmrCore` or the more +specialized :cpp:`class Amr`. Since :cpp:`class Amr` is derived from +:cpp:`class AmrCore`, the parameters for the :cpp:`AmrCore` class also apply +to the :cpp:`Amr` class. Additionally, :cpp:`class AmrCore` is derived from +:cpp:`class AmrMesh`, so :cpp:`AmrMesh` member functions are also available +to :cpp:`AmrCore` and :cpp:`Amr`. + +AmrCore Class +^^^^^^^^^^^^^ + +Below are a list of important :cpp:`ParmParse` parameters. However, AMReX +applications can choose to avoid them entirely by use this :cpp:`AMRCore` +constructor :cpp:`AmrCore(Geometry const& level_0_geom, AmrInfo const& +amr_info)`, where :cpp:`struct AmrInfo` contains all the information that +can be set via :cpp:`ParmParse`. + +.. py:data:: amr.verbose + :type: int + :value: 0 + + This controls the verbosity level of :cpp:`AmrCore` functions. + +.. py:data:: amr.n_cell + :type: int array + :value: [none] + + This parameter is used only when ``n_cell`` is not provided as an + argument to :cpp:`AmrCore` constructors. It specifies the number of cells + in each dimension on Level 0. + +.. py:data:: amr.max_level + :type: int + :value: [none] + + This parameter is used only when ``max_level`` is not provided as an + argument to :cpp:`AmrCore` constructors. It specifies the maximum level + of refinement allowed. Note that the total number of levels, including + the base level 0, is ``max_level+1``. + +.. py:data:: amr.ref_ratio + :type: int array + :value: 2 2 2 ... 2 + + If the refinement ratio is not provided as an argument to :cpp:`AmrCore` + constructors and :py:data:`amr.ref_ratio_vect` is not found in the + :cpp:`ParmParse` database, this parameter will be used to set the + refinement ratios between AMR levels. If there are more AMR levels than + the size of the integer parameter array, the last integer will be used as + the refinement ratio for the unspecified levels. For example, if + ``max_level`` is 4 and the provided ``amr.ref_ratio`` parameter is ``2 + 4``, the refinement ratios are 2, 4, 4 and 4, for levels 0/1, 1/2, 2/3 + and 3/4, respectively. + +.. py:data:: amr.ref_ratio_vect + :type: int array + :value: [none] + + If the refinement ratio is not provided as an argument to :cpp:`AmrCore` + constructors and :py:data:`amr.ref_ratio_vect` is found in the + :cpp:`ParmParse` database, it will be used to set the refinement ratios + between AMR levels. It's an error if the size of the integer array, if + found, is less than ``max_level*AMREX_SPACEDIM``. The first + ``AMREX_SPACEDIM`` numbers specify the refinement ratios in the + ``AMREX_SPACEDIM`` dimensions between levels 0 and 1, the next + ``AMREX_SPACEDIM`` numbers specify the ratios for levels 1 and 2, and so + on. + +.. py:data:: amr.max_grid_size + :type: int array + :value: [build dependent] + + This controls the maximum grid size on AMR levels, one value for each + level. If the size of the integer array is less than the total number of + levels, the last integer will be used for the unspecified levels. The + default value is 128 for 1D and 2D runs. For 3D runs, the default value + is 64 and 32, for GPU and CPU runs, respectively. Note that the user can + also call :cpp:`AmrMesh::SetMaxGridSize` to set the maximum grid + sizes. Additionally, the values set by this parameter can be overridden + by :py:data:`amr.max_grid_size_x`, :py:data:`amr.max_grid_size_y` and + :py:data:`amr.max_grid_size_z`. + +.. py:data:: amr.max_grid_size_x + :type: int array + :value: [none] + + If provided, this will override the maximum grid size in the x-direction + set by :py:data:`amr.max_grid_size`. If the size of the integer array is + less than the total number of levels, the last integer will be used for + the unspecified levels. + +.. py:data:: amr.max_grid_size_y + :type: int array + :value: [none] + + If provided, this will override the maximum grid size in the y-direction + set by :py:data:`amr.max_grid_size`. If the size of the integer array is + less than the total number of levels, the last integer will be used for + the unspecified levels. + +.. py:data:: amr.max_grid_size_z + :type: int array + :value: [none] + + If provided, this will override the maximum grid size in the z-direction + set by :py:data:`amr.max_grid_size`. If the size of the integer array is + less than the total number of levels, the last integer will be used for + the unspecified levels. + +.. py:data:: amr.blocking_factor + :type: int array + :value: [build dependent] + + This controls the blocking factor on AMR levels, one value for each + level. If the size of the integer array is less than the total number of + levels, the last integer will be used for the unspecified levels. The + default value is 8. Note that the user can also call + :cpp:`AmrMesh::SetBlockingFactor` to set the blocking + factors. Additionally, the values set by this parameter can be overridden + by :py:data:`amr.blocking_factor_x`, :py:data:`amr.blocking_factor_y` and + :py:data:`amr.blocking_factor_z`. + +.. py:data:: amr.blocking_factor_x + :type: int array + :value: [none] + + If provided, this will override the blocking factor in the x-direction + set by :py:data:`amr.blocking_factor`. If the size of the integer array + is less than the total number of levels, the last integer will be used + for the unspecified levels. + +.. py:data:: amr.blocking_factor_y + :type: int array + :value: [none] + + If provided, this will override the blocking factor in the y-direction + set by :py:data:`amr.blocking_factor`. If the size of the integer array + is less than the total number of levels, the last integer will be used + for the unspecified levels. + +.. py:data:: amr.blocking_factor_z + :type: int array + :value: [none] + + If provided, this will override the blocking factor in the z-direction + set by :py:data:`amr.blocking_factor`. If the size of the integer array + is less than the total number of levels, the last integer will be used + for the unspecified levels. + +.. py:data:: amr.n_proper + :type: int + :value: 1 + + This parameter controls the proper nesting of grids on AMR levels. For + example, if we have ``blocking_factor = 8``, ``ref_ratio = 2`` and + ``n_proper = 1``, there will be at least ``8/2*1 = 4`` coarse level cells + outside the fine level grids except at the physical boundaries. Note that + the user can also call :cpp:`AmrMesh::SetNProper(int)` to set the proper + nesting parameter. + +.. py:data:: amr.grid_eff + :type: amrex::Real + :value: 0.7 + + This parameter controls the grid efficiency threshold during grid + creation. While a higher value can enhance efficiency, it may negatively + impact overall performance, especially for GPU runs, because it tends to + create smaller grids. Note that the user can also call + :cpp:`AmrMesh::SetGridEff(Real)` to set the grid efficiency threshold. + +.. py:data:: amr.n_error_buf + :type: int array + :value: 1 1 1 ... 1 + + This parameter controls how many extra cells will be tagged around every + tagged cell. For example, if ``n_error_buf = 2``, tagging cell + ``(i,j,k)`` will result in the tagging of the region of from lower corner + ``(i-2,j-2,k-2)`` to upper corner ``(i+2,j+2,k+2)``. If the size of the + integer array is less than the number of levels, the last integer will be + used for the unspecified levels. Note that the values set by this + parameter can be overridden by :py:data:`amr.n_error_buf_x`, + :py:data:`amr.n_error_buf_y` and :py:data:`amr.n_error_buf_z`. + + +.. py:data:: amr.n_error_buf_x + :type: int array + :value: [none] + + This parameter controls the error buffer size in the x-direction. If the + size of the integer array is less than the number of levels, the last + integer will be used for the unspecified levels. + +.. py:data:: amr.n_error_buf_y + :type: int array + :value: [none] + + This parameter controls the error buffer size in the y-direction. If the + size of the integer array is less than the number of levels, the last + integer will be used for the unspecified levels. + +.. py:data:: amr.n_error_buf_z + + This parameter controls the error buffer size in the z-direction. If the + size of the integer array is less than the number of levels, the last + integer will be used for the unspecified levels. + +.. py:data:: amr.refine_grid_layout + :type: bool + :value: true + + If it's true, AMReX will attempt to chop new grids into smaller chunks + ensuring at least one grid per MPI process, provided this does not + violate the blocking factor constraint. + +.. py:data:: amr.refine_grid_layout_x + :type: bool + :value: [none] + + This parameter, if found, will override the + :py:data:`amrex.refine_grid_layout` parameter in the x-direction. + +.. py:data:: amr.refine_grid_layout_y + :type: bool + :value: [none] + + This parameter, if found, will override the + :py:data:`amrex.refine_grid_layout` parameter in the y-direction. + +.. py:data:: amr.refine_grid_layout_z + :type: bool + :value: [none] + + This parameter, if found, will override the + :py:data:`amrex.refine_grid_layout` parameter in the z-direction. + +.. py:data:: amr.check_input + :type: bool + :value: true + + If this is true, AMReX will check if the various parameters in + :cpp:`AmrMesh` are reasonable. + +Amr Class +^^^^^^^^^ + +.. warning:: These parameters are specific to :cpp:`class Amr` based + applications. If your application use :cpp:`class AmrCore` + directly, they do not apply unless you have provided + implementations for them. + +Subcycling +"""""""""" + +.. py:data:: amr.subcycling_mode + :type: string + :value: Auto + + This controls the subcycling mode of :cpp:`class Amr`. Possible value + are ``None`` for no subcycling, or ``Auto`` for subcycling. + +Regrid +"""""" + +.. py:data:: amr.regrid_int + :type: int array + :value: 1 1 1 ... 1 + + This controls how often we perform the regrid operation on AMR levels 0 + to ``max_level-1``. If the parameter is a single value, it will be used + on all levels. If the parameter is an array of more than one values, the + size must be at least ``max_level`` and values after the first + ``max_level`` elements are ignored. + +.. py:data:: amr.regrid_on_restart + :type: bool + :value: false + + This controls whether we perform regrid immediately after restart. + +.. py:data:: amr.force_regrid_level_zero + :type: bool + :value: false + + This controls whether we perform regrid on level 0. + +.. py:data:: amr.compute_new_dt_on_regrid + :type: bool + :value: false + + This controls whether we re-compute ``dt`` after regrid. + +.. py:data:: amr.initial_grid_file + :type: string + :value: [none] + + If this is set, the initial grids will be read from the specified file. + +.. py:data:: amr.regrid_file + :type: string + :value: [none] + + If this is set, regrid will use the grids in the specified file. + +I/O +""" + +.. py:data:: amr.restart + :type: string + :value: [none] + + If this is set, the simulation will restart from the specified checkpoint + file. + +.. py:data:: amr.plotfile_on_restart + :type: bool + :value: false + + If this is set to true, a plotfile will be written after restart. + +.. py:data:: amr.file_name_digits + :type: int + :value: 5 + + This parameter specifies the minimum number of digits in checkpoint and + plotfile names. + +.. py:data:: amr.checkpoint_files_output + :type: bool + :value: true + + This controls whether we write checkpoint files. + +.. py:data:: amr.check_file + :type: string + :value: chk + + This sets the "root" of checkpoint file names. For example, the + checkpoint files are named ``chk00000``, ``chk001000``, etc. by default. + +.. py:data:: amr.check_int + :type: int + :value: -1 + + This controls the interval of writing checkpoint files, defined as the + number of level 0 steps between each checkpoint. A value less than 1 + indicates no checkpoint files will be written. + +.. py:data:: amr.check_per + :type: amrex::Real + :value: -1 + + This controls the interval of writing checkpoint files, defined as the + time (not the wall time) elapsed between each checkpoint. A value less + or equal to 0 indicates no checkpoint files will be written. + +.. py:data:: amr.checkpoint_nfiles + :type: int + :value: 64 + + This is the maximum number of binary files per :cpp:`MultiFab` when + writing checkpoint files. + +.. py:data:: amr.plot_files_output + :type: bool + :value: true + + This controls whether we write plot files. + +.. py:data:: amr.plot_file + :type: string + :value: plt + + This sets the "root" of plot file names. For example, the plot files are + named ``plt00000``, ``plt001000``, etc. by default. + +.. py:data:: amr.plot_int + :type: int + :value: -1 + + This controls the interval of writing plot files, defined as the number + of level 0 steps between each plot file. A value less than 1 indicates no + plot files will be written. + +.. py:data:: amr.plot_per + :type: amrex::Real + :value: -1 + + This controls the interval of writing plot files, defined as the time + (not the wall time) elapsed between each plot file. A value less or equal + to 0 indicates no plot files will be written. + +.. py:data:: amr.plot_log_per + :type: amrex::Real + :value: -1 + + This controls the interval of writing plot files, defined as the + ``log10`` time (not the wall time) elapsed between each plot file. A + value less or equal to 0 indicates no plot files will be written. + +.. py:data:: amr.plot_max_level + :type: int + :value: amr.max_level + + This controls the finest level in a plot file. For example, if the finest + level in a run is 3, but this parameter is set to 1, only levels 0 and 1 + will be saved in a plot file. + +.. py:data:: amr.plot_nfiles + :type: int + :value: 64 + + This is the maximum number of binary files per :cpp:`MultiFab` when + writing plot files. + +.. py:data:: amr.plot_vars + :type: string array + :value: [none] + + If this parameter is set, the variables specified in the string array + will be the state variables saved in the plot files. The special values + ``ALL`` and ``NONE`` mean that all or none of the state variables will be + saved. If this parameter is not set, all state variables will be saved. + +.. py:data:: amr.derive_plot_vars + :type: string array + :value: [none] + + If this parameter is set, the variables specified in the string array + will be the derive variables saved in the plot files. The special values + ``ALL`` and ``NONE`` mean that all or none of the derive variables will + be saved. If this parameter is not set, none of the derive variables will + be saved. + +.. py:data:: amr.small_plot_file + :type: string + :value: smallplt + + This sets the "root" of small plot file names. For example, the small + plot files are named ``smallplt00000``, ``smallplt001000``, etc. by + default. + +.. py:data:: amr.small_plot_int + :type: int + :value: -1 + + This controls the interval of writing small plot files, defined as the + number of level 0 steps between each small plot file. A value less than 1 + indicates no small plot files will be written. + +.. py:data:: amr.small_plot_per + :type: amrex::Real + :value: -1 + + This controls the interval of writing small plot files, defined as the + time (not the wall time) elapsed between each small plot file. A value + less or equal to 0 indicates no small plot files will be written. + +.. py:data:: amr.small_plot_log_per + :type: amrex::Real + :value: -1 + + This controls the interval of writing small plot files, defined as the + ``log10`` time (not the wall time) elapsed between each small plot + file. A value less or equal to 0 indicates no small plot files will be + written. + +.. py:data:: amr.small_plot_vars + :type: string array + :value: [none] + + If this parameter is set, the variables specified in the string array + will be the state variables saved in the small plot files. The special + values ``ALL`` and ``NONE`` mean that all or none of the state variables + will be saved. If this parameter is not set, none of the state variables + will be saved. + +.. py:data:: amr.derive_small_plot_vars + :type: string array + :value: [none] + + If this parameter is set, the variables specified in the string array + will be the derive variables saved in the small plot files. The special + values ``ALL`` and ``NONE`` mean that all or none of the derive variables + will be saved. If this parameter is not set, none of the derive variables + will be saved. + +.. py:data:: amr.message_int + :type: int + :value: 10 + + This controls the interval of checking messages during a run, defined as + the number of level 0 steps between checks. A value less than 1 indicates + no checking will be performed. A message refers to a file created by the + user on the disk, where only the file name is checked, not its + content. If the file name matches one of the following predefined names, + appropriate actions will be taken. + + dump_and_continue + Make a checkpoint file and continue running the simulation. + + stop_run + Stop the simulation. + + dump_and_stop + Make a checkpoint file and stop the simulation. + + plot_and_continue + Make a plot file and continue running the simulation. + + small_plot_and_continue + Make a small plot file and continue running the simulation. + +.. py:data:: amr.write_plotfile_with_checkpoint + :type: bool + :value: true + + This parameter is for the message action discussed in + :py:data:`amr.message_int`. It controls whether an action will make a + plot file as well when asked to make a checkpoint file. + +.. py:data:: amr.run_log + :type: string + :value: [none] + + If this parameter is set, the run log will be enabled and this is the log + file name. + +.. py:data:: amr.run_log_terse + :type: string + :value: [none] + + If this parameter is set, the terse run log will be enabled and this is + the log file name. + +.. py:data:: amr.grid_log + :type: string + :value: [none] + + If this parameter is set, the grid log will be enabled and this is the + log file name. + +.. py:data:: amr.data_log + :type: string + :value: [none] + + If this parameter is set, the data log will be enabled and this is the + log file name. + +Basic Controls +-------------- + +.. py:data:: amrex.verbose + :type: int + :value: 1 + + This controls the verbosity level of AMReX. Besides using + :cpp:`ParmParse`, you can also call :cpp:`amrex::SetVerbose(int)` to set + it. + +.. py:data:: amrex.init_snan + :type: bool + :value: [build dependent] + + This controls whether :cpp:`MultiFab`, :cpp:`FArrayBox`, + :cpp:`BaseFab`, :cpp:`PODVectors`, + :cpp:`Gpu::DeviceVector`, etc. will be initialized to + signaling NaNs at construction. The default value is true for debug + builds. For non-debug builds, the default is false unless ``TEST=TRUE`` + for GNU Make or ``AMReX_TESTING`` is enabled for CMake. + +.. py:data:: amrex.abort_on_unused_inputs + :type: bool + :value: false + + If this is true and there are unused :cpp:`ParmParse` parameters, AMReX + will abort during :cpp:`amrex::Finalize`. + +.. py:data:: amrex.parmparse.verbose + :type: int + :value: amrex.verbose + + If this is greater than zero, unused :cpp:`ParmParse` variables will be + printed out during :cpp:`amrex::Finalize` or + :cpp:`ParmParse::QueryUnusedInputs`. The parameter can also be set by + calling :cpp:`amrex::ParmParse::SetVerbose(int)`. + +.. py:data:: amrex.device.verbose + :type: int + :value: 0 + + This controls whether AMReX prints out GPU device properties such name, + vendor, total memory size, etc. This is only relevant for GPU runs. + +.. py:data:: amrex.max_gpu_streams + :type: int + :value: 4 + + This controls the number of GPU streams used by AMReX. It's only relevant + for GPU runs. + +.. py:data:: amrex.omp_threads + :type: string + :value: system + + If OpenMP is enabled, this can be used to set the default number of + threads. Possible values are ``system``, ``nosmt``, or an integer + string. The special value ``nosmt`` can be used to avoid using threads + for virtual cores (aka Hyperthreading or SMT), as is default in OpenMP, + and instead only spawns threads equal to the number of physical cores in + the system. For the values ``system`` and ``nosmt``, the environment + variable ``OMP_NUM_THREADS`` takes precedence. If the string can be + converted to an integer, ``OMP_NUM_THREADS`` is ignored. + +.. py:data:: amrex.memory_log + :type: string + :value: memlog + + This is the name of the memory log file when memory profiling is enabled. + +Communication +------------- + +.. py:data:: amrex.use_gpu_aware_mpi + :type: bool + :value: false + + For GPU runs, this controls the memory type used for AMReX's + communication buffers. When this is true, AMReX uses GPU device memory + for communication data in MPI function calls. When this is false, the + data are placed in pinned memory. Note that this flag does not enable + GPU-aware MPI by itself. Enabling GPU-aware MPI is system + dependent. Users should consult their system's documentation for + instructions on setting up the environment and linking to GPU-aware MPI + libraries. + +Distribution Mapping +-------------------- + +.. py:data:: DistributionMapping.verbose + :type: int + :value: 0 + + This controls the verbosity level of :cpp:`DistributionMapping` + functions. + +.. py:data:: DistributionMapping.strategy + :type: string + :value: SFC + + This is the default :cpp:`DistributionMapping` strategy. Possible values + are ``SFC``, ``KNAPSACK``, ``ROUNDROBIN``, or ``RRSFC``. Note that the + default strategy can also be set by calling + :cpp:`DistributionMapping::strategy(DistributionMapping::Strategy)`. + +Embedded Boundary +----------------- + +.. py:data:: eb2.max_grid_size + :type: int + :value: 64 + + This parameter specifies the maximum grid size in AMReX's internal EB + database, not the user's data. + +.. py:data:: eb2.extend_domain_face + :type: bool + :value: true + + This controls the behavior of the embedded boundary outside the + domain. If this is true, the embedded boundary outside the domain is + extended perpendicularly from the domain face. Otherwise, it's generated + with the user provided implicit function. Note that this parameter can be + overridden by the user when calling :cpp:`amrex::EB2::Build` with the + optional parameter ``bool extend_domain_face``. + +.. py:data:: eb2.num_coarsen_opt + :type: int + :value: 0 + + If it is greater than 0, this parameter can speed up the EB + generation. It indicates that the search for EB can be performed on grids + coarsened by this factor and then the EB information details will be + generated on the original grids. However, the user should be aware that + setting this parameter too high could result in erroneous results. Also + note that this parameter can be overridden by the user when calling + :cpp:`amrex::EB2::Build` with the optional parameter ``int + num_coarsen_opt``. + +.. py:data:: eb2.geom_type + :type: string + :value: [none] + + There are two versions of the `amrex::EB2::Build` function that can be + used to build EB. One version is a function template that takes a user + provided :cpp:`GeometryShop`, while the other uses :cpp:`ParmParse` + parameters to build EB. For the latter version, this parameter specifies + the type of the EB. Possible values include the following. + + all_regular + The entire domain is regular without any EB objects. + + parser + The embedded boundary is describe by :py:data:`eb2.parser_function`. + + stl + The embedded boundary will be built using an STL file specified by + :py:data:`eb2.stl_file`. + +.. py:data:: eb2.parser_function + :type: string + :value: [none] + + When ``eb2.geom_type = parser``, this parameter is a parser function + string that contains a math expression describing the surface of the EB. + + .. seealso:: Section :ref:`sec:basics:parser`. + +.. py:data:: eb2.stl_file + :type: string + :value: [none] + + When ``eb2.geom_type = stl``, this is a required string parameter + specifying the STL file name. + +.. py:data:: eb2.stl_scale + :type: amrex:Real + :value: 1 + + When building EB using STL, the triangles in the STL file will be scaled + by the given value of this optional parameter. + +.. py:data:: eb2.stl_center + :type: amrex::Real array + :value: 0 0 0 + + When building EB using STL, this optional parameter specifies the shifted + center. The original coordinates in the STL file will be shifted by the + provided values. + +.. py:data:: eb2.stl_reverse_normal + :type: bool + :value: false + + When building EB using STL, the normal direction of the triangles in the + STL file will be reversed if this optional parameter is set to true. + +.. py:data:: eb2.small_volfrac + :type: amrex::Real + :value: [depend on the type of amrex::Real] + + This parameter specifies the threshold for small cells that will be + converted to covered cells. The default value is ``1.e-14`` if + :cpp:`amrex::Real` is ``double``, or ``1.e-5`` if :cpp:`amrex::Real` is + ``float``. + +.. py:data:: eb2.cover_multiple_cuts + :type: bool + :value: false + + If this parameter is set to true, multi-cut cells will be converted to + covered cells. + + .. tip:: Because AMReX currently does not support multi-cut cells, it + would be a runtime error if multi-cut cells are left unfixed. + +.. py:data:: eb2.maxiter + :type: int + :value: 32 + + Fixing small and multi-cut cells is an iterative process. This parameter + specifies the maximum number of iterations for the fix-up process. + +Error Handling +-------------- + +By default AMReX installs a signal handler that will be run when a signal +such as segfault is received. You can also enable floating point exception +trapping. The signal handler will print out backtraces that can be useful +for debugging. + +.. note:: Floating point exception trapping is not enabled by default, + because compilers might generate optimized SIMD code that raises the + exceptions. + +.. py:data:: amrex.signal_handling + :type: bool + :value: true + + This controls whether AMReX should handle signals. + +.. py:data:: amrex.handle_sigsegv + :type: bool + :value: true + + If both this flag and ``amrex.signal_handling`` are true, ``SIGSEGV`` + will be handled by AMReX. + +.. py:data:: amrex.handle_sigterm + :type: bool + :value: false + + If both this flag and ``amrex.signal_handling`` are true, ``SIGTERM`` + will be handled by AMReX. This flag is false by default because this + could generate lots of backtrace files on some batch systems that issue + ``SIGTERM`` for jobs running out of wall clock time. + +.. py:data:: amrex.handle_sigint + :type: bool + :value: true + + If both this flag and ``amrex.signal_handling`` are true, ``SIGINT`` + will be handled by AMReX. + +.. py:data:: amrex.handle_sigabrt + :type: bool + :value: true + + If both this flag and ``amrex.signal_handling`` are true, ``SIGABGT`` + will be handled by AMReX. + +.. py:data:: amrex.handle_sigfpe + :type: bool + :value: true + + If both this flag and ``amrex.signal_handling`` are true, ``SIGFPE`` + will be handled by AMReX. + + .. seealso:: + Use :py:data:`amrex.fpe_trap_invalid`, :py:data:`amrex.fpe_trap_zero` + and :py:data:`amrex.fpe_trap_overflow` to enable ``FE_INVALID``, + ``FE_DIVBYZERO`` and ``FE_OVERFLOW`` trapping, respectively. + +.. py:data:: amrex.handle_sigill + :type: bool + :value: true + + If both this flag and ``amrex.signal_handling`` are true, ``SIGILL`` + will be handled by AMReX. + +.. py:data:: amrex.throw_exception + :type: bool + :value: false + + If this flag is true and ``amrex.signal_handling`` is false, + :cpp:`amrex::Abort` and :cpp:`amrex::Error` will throw + :cpp:`std::runtime_error` instead of aborting immediately. Note that + according the C++ standard, if an exception is thrown and not caught, + :cpp:`std::terminate` will be called. + +.. py:data:: amrex.fpe_trap_invalid + :type: bool + :value: false + + If ``SIGFPE`` is handled by AMReX and this flag is true, ``FE_INVALID`` + (e.g., ``0/0``) trapping will be enabled. This flag has no effect on + Windows. + +.. py:data:: amrex.fpe_trap_zero + :type: bool + :value: false + + If ``SIGFPE`` is handled by AMReX and this flag is true, + ``FE_DIVBYZERO`` (e.g., ``1/0``) trapping will be enabled. This flag has + no effect on Windows. + +.. py:data:: amrex.fpe_trap_overflow + :type: bool + :value: false + + If ``SIGFPE`` is handled by AMReX and this flag is true, ``FE_OVERFLOW`` + (i.e., the result is too large to be representable) trapping will be + enabled. This flag has no effect on Windows. + +Extern +------ + +Hypre +^^^^^ + +These parameters are relevant only when Hypre support is enabled. + +.. py:data:: amrex.init_hypre + :type: bool + :value: true + + This controls whether AMReX should call ``HYPRE_Init()`` during + :cpp:`amrex::Initialize`. + +.. py:data:: amrex.hypre_spgemm_use_vendor + :type: bool + :value: false + + This controls whether HYPRE should use the vendor's ``SpGemm`` + functionality. + +.. py:data:: amrex.hypre_spmv_use_vendor + :type: bool + :value: false + + This controls whether HYPRE should use the vendor's ``SpMV`` + functionality. + +.. py:data:: amrex.hypre_sptrans_use_vendor + :type: bool + :value: false + + This controls whether HYPRE should use the vendor's ``SpTrans`` + functionality. + +.. _sec:inputs:geom: + +Geometry +-------- + +All these parameters are optional for constructing a :ref:`Geometry ` +object. There are only used if the information is not provided via function +arguments. + +.. py:data:: geometry.coord_sys + :type: int + :value: 0 + + This specifies the coordinate system type with valid values being 0 + (Cartesian), or 1 (cylindrical), or 2 (spherical). + +.. py:data:: geometry.prob_lo + :type: amrex::Real array + :value: 0 0 0 + + This specifies the position of the lower corner of the physical domain. + +.. py:data:: geometry.prob_hi + :type: amrex::Real array + :value: [none] + + This specifies the position of the upper corner of the physical + domain. If this is provided, :py:data:`geometry.prob_extent` will be + ignored. + +.. py:data:: geometry.prob_extent + :type: amrex::Real array + :value: [none] + + This specifies the length of the physical domain. If + :py:data:`geometry.prob_hi` is provided, this will be ignored. + +.. py:data:: geometry.is_periodic + :type: int array + :value: 0 0 0 + + These integer parameters are boolean flags to indicate whether the domain + is periodic in each direction. It's considered true (i.e., periodic) if + its value is non-zero, and false (i.e., non-periodic) if its value is + zero. + +I/O +--- + +.. py:data:: amrex.async_out + :type: bool + :value: false + + If this is true, AMReX's native mesh and particle plotfiles will be + written asynchronously by a background thread. + +.. py:data:: amrex.async_out_nfiles + :type: into + :value: 64 + + This is the maximum number of binary files on each AMR level that will be + used when AMReX writes a plotfile asynchronously. + +.. py:data:: vismf.verbose + :type: int + :value: 0 + + This controls the verbosity level of :cpp:`VisMF` functions. + +Memory +------ + +.. py:data:: amrex.the_arena_init_size + :type: long + :value: [system dependent] + + This controls the main memory arena's initial size in bytes. For CPU + runs, the default is 0, whereas for GPU runs, the default is set at run + time to 3/4 of the system's device memory. + + .. tip:: Since ``amrex v24.08``, instead of + ``amrex.the_arena_init_size=10000000000``, one can use + ``amrex.the_arena_init_size=10'000'000'000`` or + ``amrex.the_arena_init_size=1e10`` to set :cpp:`ParmParse` + integer parameters like this one. + +.. py:data:: amrex.the_device_arena_init_size + :type: long + :value: 8388608 [8 MB] + + This controls the GPU device arena's initial size in bytes. For CPU runs, + this is ignored. If the main arena uses the device memory (as opposed to + managed memory), this parameter is also ignored. + +.. py:data:: amrex.the_managed_arena_init_size + :type: long + :value: 8388608 [8 MB] + + This controls the managed device arena's initial size in bytes. For CPU + runs, this is ignored. If the main arena uses the managed memory (as + opposed to device memory), this parameter is also ignored. + +.. py:data:: amrex.the_pinned_arena_init_size + :type: long + :value: [system dependent] + + This controls the pinned host memory arena's initial size in bytes. The + default is 8 MB for CPU runs. For GPU runs it's set to half of the GPU + device memory by default. + +.. py:data:: amrex.the_comms_arena_init_size + :type: long + :value: 8388608 [8 MB] + + This controls the MPI communication memory arena's initial size in bytes. + +.. py:data:: amrex.the_arena_release_threshold + :type: long + :value: LONG_MAX + + This controls the release threshold of the main arena. + +.. py:data:: amrex.the_device_arena_release_threshold + :type: long + :value: LONG_MAX + + This controls the release threshold of the device arena. + +.. py:data:: amrex.the_managed_arena_release_threshold + :type: long + :value: LONG_MAX + + This controls the release threshold of the managed arena. + +.. py:data:: amrex.the_pinned_arena_release_threshold + :type: long + :value: LONG_MAX + + This controls the release threshold of the pinned arena. + +.. py:data:: amrex.the_comms_arena_release_threshold + :type: long + :value: LONG_MAX + + This controls the release threshold of the communication arena. + +.. py:data:: amrex.the_async_arena_release_threshold + :type: long + :value: LONG_MAX + + This controls the release threshold of the asynchronous arena. Note that + this is only relevant for the CUDA (>= 11.2) and HIP backends that + support stream-ordered memory allocator. + +.. py:data:: amrex.the_arena_is_managed + :type: bool + :value: false + + This controls if AMReX uses the managed memory for the main arena. This + is only relevant for GPU runs. + +.. py:data:: amrex.abort_on_out_of_gpu_memory + :type: bool + :value: false + + This controls if AMReX should simply abort when the reported free device + memory is less than the amount an arena is asked to allocate. Note that + for managed memory it's possible to allocate more than the amount of free + device memory available. However, the code will be very slow. This + parameter is only relevant for GPU runs. + +.. py:data:: amrex.mf.alloc_single_chunk + :type: bool + :value: false + + This controls if all the data in a :cpp:`FabArray` (including + :cpp:`MultiFab`) are in a contiguous chunk of memory. + +.. py:data:: amrex.vector_growth_factor + :type: amrex::Real + :value: 1.5 + + This controls the growth factor of :cpp:`amrex::PODVector` and its + derived classes such as :cpp:`amrex::Gpu::DeviceVector`, + :cpp:`amrex::Gpu::ManagedVector`, etc. A smaller value can avoid wasting + memory, but it may result in a performance penalty during resizing. + +Particles +--------- + +.. py:data:: particles.do_tiling + :type: bool + :value: false + + This controls whether tiling is enabled for particle containers. + +.. py:data:: particles.tile_size + :type: int array + :value: 1024000 8 8 + + When tiling is enabled, this is the default tile size. Note that a big + number like 1024000 effectively turns tiling off in that direction. + +.. py:data:: particles.do_mem_efficient_sort + :type: bool + :value: true + + This parameter controls whether the more memory efficient method will be + used for sorting particles. + +.. py:data:: particles.particles_nfiles + :type: int + :value: 256 + + This is the maximum number of binary files per level for a particle + container when writing checkpoint and plot files for particles. The + special value of ``-1`` indicates one file per process. + +Tiling +------ + +.. py:data:: fabarray.mfiter_tile_size + :type: int array + :value: [build dependent] + + This is the default size for :ref:`tiling `. For GPU + runs, it is disabled by default. For CPU runs, it is disabled by default + in 1D and 2D, but enabled in 3D with a tile size of 8 in the y and + z-directions. + +.. py:data:: fabarray.comm_tile_size + :type: int array + :value: [build dependent] + + This is the default tiling size used in moving data in and out of the MPI + communication buffer . It is disabled by default for GPU runs, but + enabled for CPU runs with a tile size of 8 in the y and z-directions (if + they exist). + +Tiny Profiler +------------- + +These parameters are ignored unless profiling with :cpp:`TinyProfiler` is +enabled. + +.. py:data:: tiny_profiler.verbose + :type: int + :value: 0 + + If this value is greater than 0, messages about entering or leaving + profiled regions will be printed on the I/O process. + +.. py:data:: tiny_profiler.print_threshold + :type: double + :value: 1.0 + + In the profiling report, regions with very small run times are not listed + individually. Instead, they are included in a section named "Other". This + parameter specifies the maximum inclusive run time that the "Other" + section can take in percent relative to the total run time. + +.. py:data:: tiny_profiler.device_synchronize_around_region + :type: bool + :value: false + + This parameter is only relevant for GPU runs. If it is set to true, the + current GPU stream is synchronized when entering and leaving a profiling + region. Because GPU kernels are asynchronous, time measurements without + synchronization could be misleading. Enabling this parameter can provide + more accurate measurements. However, the added synchronization points, + which are unnecessary for correctness, could potentially degrade the + performance. + +.. py:data:: tiny_profiler.enabled + :type: bool + :value: true + + .. versionadded:: 24.09 + Runtime parameter `tiny_profiler.enabled``. + + This parameter can be used to disable tiny profiling including + :cpp:`CArena` memory profiling at run time. + +.. py:data:: tiny_profiler.memprof_enabled + :type: bool + :value: true + + .. versionadded:: 24.09 + Runtime parameter ``tiny_profiler.memprof_enabled``. + + This parameter can be used to disable :cpp:`CArena` memory profiling at + run time. If ``tiny_profiler.enabled`` is false, this parameter has no + effects. + +.. py:data:: tiny_profiler.output_file + :type: string + :value: [empty] + + .. versionadded:: 24.09 + Runtime parameter ``tiny_profiler.output_file``. + + If this parameter is empty, the output of tiny profiling is dumped on the + default out stream of AMReX. If it's not empty, it specifies the file + name for the output. Note that ``/dev/null`` is a special name that mean + a null file. diff --git a/Docs/sphinx_documentation/source/TimeIntegration_Chapter.rst b/Docs/sphinx_documentation/source/TimeIntegration_Chapter.rst index 720c312e2db..ef311de0439 100644 --- a/Docs/sphinx_documentation/source/TimeIntegration_Chapter.rst +++ b/Docs/sphinx_documentation/source/TimeIntegration_Chapter.rst @@ -16,13 +16,8 @@ A Simple Time Integrator Setup ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is best shown with some sample code that sets up a time integrator and -asks it to step forwards by some interval ``dt``. The user needs to supply at -minimum, the right-hand side function using the ``TimeIntegrator::set_rhs()`` -function. By using the ``TimeIntegrator::set_post_update()`` function, a user -can also supply a post update function which is called on state data immediately -before evaluating the right-hand side. This post update function is a good -opportunity to fill boundary conditions for Runge-Kutta stage solution data so that -ghost cells are filled when the right hand side function is called on that solution data. +asks it to step forward by some interval ``dt``. The user needs to supply the +right-hand side function using the ``TimeIntegrator::set_rhs()`` function. .. highlight:: c++ @@ -30,171 +25,26 @@ ghost cells are filled when the right hand side function is called on that solut #include - MultiFab Sborder; // MultiFab containing old-time state data and ghost cells - MultiFab Snew; // MultiFab where we want new-time state data - Geometry geom; // The domain (or level) geometry + MultiFab Sold; // MultiFab containing old-time state data + MultiFab Snew; // MultiFab where we want new-time state data - // [Fill Sborder here] + // [Fill Sold here] // Create a time integrator that will work with // MultiFabs with the same BoxArray, DistributionMapping, // and number of components as the state_data MultiFab. - TimeIntegrator integrator(Sborder); + TimeIntegrator integrator(Sold); - // Create a RHS source function we will integrate - auto source_fun = [&](MultiFab& rhs, const MultiFab& state, const Real time){ - // User function to calculate the rhs MultiFab given the state MultiFab - fill_rhs(rhs, state, time); + // Create a function that fills the state BCs and computes the RHS + auto rhs_fun = [&](MultiFab& rhs, MultiFab& state, const Real time){ + // [Calculate the rhs MultiFab given the state MultiFab] }; - // Create a function to call after updating a state - auto post_update_fun = [&](MultiFab& S_data, const Real time) { - // Call user function to update state MultiFab, e.g. fill BCs - post_update(S_data, time, geom); - }; - - // Attach the right hand side and post-update functions - // to the integrator + // Attach the right hand side function to the integrator integrator.set_rhs(source_fun); - integrator.set_post_update(post_update_fun); - - // integrate forward one step from `time` by `dt` to fill S_new - integrator.advance(Sborder, S_new, time, dt); - -.. _sec:time_int:sundials: - -Using SUNDIALS -^^^^^^^^^^^^^^ - -The AMReX Time Integration interface also supports a SUNDIALS backend that -wraps both the explicit Runge-Kutta (ERK) and multirate (MRI) integration -schemes in the SUNDIALS ARKODE package. To use either of them, the user needs -to compile AMReX with `USE_SUNDIALS=TRUE` and use SUNDIALS v. 6.0 or later. - -There are only minor changes to the code above required to use the SUNDIALS -interface. The first change is that the integration datatype is now a -`Vector` type instead of simply `MultiFab`. The reason for -introducing a `Vector` in this case, is to permit integrating state -data with different spatial centering (e.g. cell centered, face centered, node -centered) concurrently. Shown here is sample code equivalent to the code above, -suitable for the SUNDIALS explicit Runge-Kutta integrator: - -.. highlight:: c++ - -:: - - #include - - Vector Sborder; // MultiFab(s) containing old-time state data and ghost cells - Vector Snew; // MultiFab(s) where we want new-time state data - Geometry geom; // The domain (or level) geometry - - // [Fill Sborder here] - - // Create a time integrator that will work with - // MultiFabs with the same BoxArray, DistributionMapping, - // and number of components as the state_data MultiFab. - TimeIntegrator > integrator(Sborder); - - // Create a RHS source function we will integrate - auto source_fun = [&](Vector& rhs, const Vector& state, const Real time){ - // User function to calculate the rhs MultiFab given the state MultiFab - fill_rhs(rhs, state, time); - }; - - // Create a function to call after updating a state - auto post_update_fun = [&](Vector& S_data, const Real time) { - // Call user function to update state MultiFab, e.g. fill BCs - post_update(S_data, time, geom); - }; - - // Attach the right hand side and post-update functions - // to the integrator - integrator.set_rhs(source_fun); - integrator.set_post_update(post_update_fun); - - // integrate forward one step from `time` by `dt` to fill S_new - integrator.advance(Sborder, S_new, time, dt); - -Afterwards, to select the ERK integrator, one needs only to add the following -two input parameters at runtime: - -:: - - integration.type = SUNDIALS - integration.sundials.strategy = ERK - -If instead one wishes to use the SUNDIALS multirate integrator, then the user -will need to use the following runtime inputs parameters: - -:: - - integration.type = SUNDIALS - integration.sundials.strategy = MRI - -In addition, to set up the multirate problem, the user needs to supply a fast -timescale right-hand-side function in addition to the usual right hand side -function (which is interpreted as the slow timescale right-hand side). The user -will also need to supply the ratio of the slow timestep size to the fast -timestep size, which is an integer corresponding to the number of fast -timesteps the integrator will take per every slow timestep. An example code -snippet would look as follows: - -.. highlight:: c++ - -:: - - #include - - Vector Sborder; // Vector of MultiFab(s) containing old-time state data and ghost cells - Vector Snew; // Vector of MultiFab(s) where we want new-time state data - Geometry geom; // The domain (or level) geometry - - // [Fill Sborder here] - - // Create a time integrator that will work with - // MultiFabs with the same BoxArray, DistributionMapping, - // and number of components as the state_data MultiFab. - TimeIntegrator > integrator(Sborder); - - // Create a slow timescale RHS function we will integrate - auto rhs_fun = [&](Vector& rhs, const Vector& state, const Real time){ - // User function to calculate the rhs MultiFab given the state MultiFab(s) - fill_rhs(rhs, state, time); - }; - - // Create a fast timescale RHS function to integrate - auto rhs_fun_fast = [&](Vector& rhs, - const Vector& stage_data, - const Vector& state, const Real time) { - // User function to calculate the fast-timescale rhs MultiFab given - // the state MultiFab and stage_data which holds the previously - // accessed slow-timescale stage state data. - fill_fast_rhs(rhs, stage_data, state, time); - }; - - // The post update function is called after updating state data or - // immediately before using state data to calculate a fast or slow right hand side. - // (it is a good place to e.g. fill boundary conditions) - auto post_update_fun = [&](Vector& S_data, const Real time) { - // Call user function to update state MultiFab(s), e.g. fill BCs - post_update(S_data, time, geom); - }; - - // Attach the slow and fast right hand side functions to integrator - integrator.set_rhs(rhs_fun); - integrator.set_fast_rhs(rhs_fun_fast); - - // This sets the ratio of slow timestep size to fast timestep size as an integer, - // or equivalently, the number of fast timesteps per slow timestep. - integrator.set_slow_fast_timestep_ratio(2); - - // Attach the post update function to the integrator - integrator.set_post_update(post_update_fun); - - // integrate forward one step from `time` by `dt` to fill S_new - integrator.advance(Sborder, S_new, time, dt); + // integrate forward one step from `time` by `dt` to fill Snew + integrator.advance(Sold, Snew, time, dt); Picking A Time Integration Method ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -205,12 +55,6 @@ a generic explicit Runge-Kutta method. If Runge-Kutta is selected, then the user can choose which of a set of predefined Butcher Tables to use, or can choose to use a custom table and supply it manually. -When AMReX is compiled with SUNDIALS v.6 or later, the user also has an option -to use the SUNDIALS ARKODE integrator as a backend for the AMReX Time Integrator -class. The features of this interface evolve with the needs of our codes, so -they may not yet support all SUNDIALS configurations available. If you find you -need SUNDIALS options we have not implemented, please let us know. - The full set of integrator options are detailed as follows: :: @@ -222,7 +66,7 @@ The full set of integrator options are detailed as follows: ## (without the quotation marks) ## "ForwardEuler" or "0" = Native Forward Euler Integrator ## "RungeKutta" or "1" = Native Explicit Runge Kutta - ## "SUNDIALS" or "2" = SUNDIALS ARKODE Integrator + ## "SUNDIALS" or "2" = SUNDIALS Integrator ## for example: integration.type = RungeKutta @@ -246,35 +90,104 @@ The full set of integrator options are detailed as follows: integration.rk.nodes = 0 integration.rk.tableau = 0.0 - ## *** Parameters Needed For SUNDIALS ARKODE Integrator *** - ## integration.sundials.strategy specifies which ARKODE strategy to use. - ## The available options are (without the quotations): - ## "ERK" = Explicit Runge Kutta - ## "MRI" = Multirate Integrator - ## "MRITEST" = Tests the Multirate Integrator by setting a zero-valued fast RHS function - ## for example: - integration.sundials.strategy = ERK - - ## *** Parameters Specific to SUNDIALS ERK Strategy *** - ## (Requires integration.type=SUNDIALS and integration.sundials.strategy=ERK) - ## integration.sundials.erk.method specifies which explicit Runge Kutta method - ## for SUNDIALS to use. The following options are supported: - ## "SSPRK3" = 3rd order strong stability preserving RK (default) - ## "Trapezoid" = 2nd order trapezoidal rule - ## "ForwardEuler" = 1st order forward euler - ## for example: - integration.sundials.erk.method = SSPRK3 - - ## *** Parameters Specific to SUNDIALS MRI Strategy *** - ## (Requires integration.type=SUNDIALS and integration.sundials.strategy=MRI) - ## integration.sundials.mri.implicit_inner specifies whether or not to use an implicit inner solve - ## integration.sundials.mri.outer_method specifies which outer (slow) method to use - ## integration.sundials.mri.inner_method specifies which inner (fast) method to use - ## The following options are supported for both the inner and outer methods: - ## "KnothWolke3" = 3rd order Knoth-Wolke method (default for outer method) - ## "Trapezoid" = 2nd order trapezoidal rule - ## "ForwardEuler" = 1st order forward euler (default for inner method) - ## for example: - integration.sundials.mri.implicit_inner = false - integration.sundials.mri.outer_method = KnothWolke3 - integration.sundials.mri.inner_method = Trapezoid +.. _sec:time_int:sundials: + +Using SUNDIALS +^^^^^^^^^^^^^^ + +The AMReX Time Integration interface also supports a SUNDIALS backend that +provides explicit, implicit, and implicit-explicit (ImEx) Runge-Kutta methods +as well a multirate (MRI) methods from the ARKODE package in SUNDIALS. +To use SUNDIALS integrators, the user needs to compile AMReX with +``USE_SUNDIALS=TRUE`` and use SUNDIALS v6.0 or later. + +The SUNDIALS interface supports ``MultiFab`` or ``Vector`` data +types. Using a ``Vector`` permits integrating state data with +different spatial centering (e.g. cell centered, face centered, node centered) +concurrently. + +The same code as above can be used with SUNDIALS explicit or implicit +Runge-Kutta methods without any modification. To select a SUNDIALS explicit +Runge-Kutta integrator, one needs only to add the following two input parameters +at runtime: + +:: + + integration.type = SUNDIALS + integration.sundials.type = ERK + +One can select a different method type by changing the value of +``integration.sundials.type`` to one of the following values: + ++------------------------+--------------------------------------------------+ +| Input Option | SUNDIALS Method Type | ++========================+==================================================+ +| ERK | Explicit Runge-Kutta method | ++------------------------+--------------------------------------------------+ +| DIRK | Diagonally Implicit Runge-Kutta method | ++------------------------+--------------------------------------------------+ +| IMEX-RK | Implicit-Explicit Additive Runge-Kutta method | ++------------------------+--------------------------------------------------+ +| EX-MRI | Explicit Multirate Infinitesimal method | ++------------------------+--------------------------------------------------+ +| IM-MRI | Implicit Multirate Infinitesimal method | ++------------------------+--------------------------------------------------+ +| IMEX-MRI | Implicit-Explicit Multirate Infinitesimal method | ++------------------------+--------------------------------------------------+ + +For ImEx methods, the user needs to supply two right-hand side functions, an +implicit and an explicit function, using the function +``TimeIntegrator::set_imex_rhs()``. Similarly for multirate methods, the user +needs to supply slow and fast right-hand side functions using +``TimeIntegrator::set_rhs()`` to set the slow function and +``TimeIntegrator::set_fast_rhs()`` to set the fast function. With multirate +methods, one also needs to select the fast time scale method type using the +input option ``integration.sundials.fast_type`` which maybe set to ``ERK`` or +``DIRK``. + +To select a specific SUNDIALS method use the input option +``integration.sundials.method`` for ERK and DIRK methods as well as the slow +time scale method with an MRI integrator, use ``integration.sundials.method_i`` +and ``integration.sundials.method_e`` to set the implicit and explicit method in +an ImEx method, and ``integration.sundials.fast_method`` to set the ERK or DIRK +method used at the fast time scale with an MRI integrator. These options may be +set to any valid SUNDIALS method name, see the following sections in the +SUNDIALS documentation for more details: + +* `ERK methods `_ +* `DIRK methods `_ +* `ImEx methods `_ +* `MRI methods `_ + +The full set of integrator options are detailed as follows: + +:: + + # INTEGRATION WITH SUNDIALS + + # *** Select the SUNDIALS integrator backend *** + integration.type = SUNDIALS + + # *** Select the SUNDIALS method type *** + # ERK = Explicit Runge-Kutta method + # DIRK = Diagonally Implicit Runge-Kutta method + # IMEX-RK = Implicit-Explicit Additive Runge-Kutta method + # EX-MRI = Explicit Multirate Infatesimal method + # IM-MRI = Implicit Multirate Infatesimal method + # IMEX-MRI = Implicit-Explicit Multirate Infatesimal method + integration.sundials.type = ERK + + # *** Select a specific SUNDIALS ERK method *** + integration.sundials.method = ARKODE_BOGACKI_SHAMPINE_4_2_3 + + # *** Select a specific SUNDIALS ImEx method *** + integration.sundials.method_i = ARKODE_ARK2_DIRK_3_1_2 + integration.sundials.method_e = ARKODE_ARK2_ERK_3_1_2 + + # *** Select a specific SUNDIALS MRI method *** + integration.sundials.method = ARKODE_MIS_KW3 + integration.sundials.fast_method = ARKODE_KNOTH_WOLKE_3_3 + +The features of this interface evolve with the needs of our codes, so they may +not yet support all SUNDIALS configurations available. If you find you need +SUNDIALS options we have not implemented, please let us know. diff --git a/Docs/sphinx_documentation/source/index.rst b/Docs/sphinx_documentation/source/index.rst index d302af07468..203545cf40a 100644 --- a/Docs/sphinx_documentation/source/index.rst +++ b/Docs/sphinx_documentation/source/index.rst @@ -57,7 +57,7 @@ Documentation on migration from BoxLib is available in the AMReX repository at D Visualization_Chapter Post_Processing_Chapter Debugging - Inputs_Chapter + RuntimeParameters AMReX_Profiling_Tools_Chapter External_Profiling_Tools_Chapter External_Frameworks_Chapter @@ -69,13 +69,6 @@ Documentation on migration from BoxLib is available in the AMReX repository at D :maxdepth: 1 :caption: API -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - The copyright notice of AMReX is included in the AMReX home directory as README.txt. Your use of this software is under the 3-clause BSD license -- the license agreement is included in the diff --git a/README.md b/README.md index 02ea15dcbfb..64aad625bfe 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ refinement applications. [Gallery](#Gallery) - [Get Help](#get-help) - [Contribute](#Contribute) - +[Copyright Notice](#copyright-notice) - [License](#License) - [Citation](#Citation) diff --git a/Src/Amr/AMReX_Amr.H b/Src/Amr/AMReX_Amr.H index 3d9df9484ec..627285f9d9a 100644 --- a/Src/Amr/AMReX_Amr.H +++ b/Src/Amr/AMReX_Amr.H @@ -181,7 +181,7 @@ public: static void fillDerivePlotVarList (); static void fillDeriveSmallPlotVarList (); - static void setComputeNewDtOnRegrid (int flag) { compute_new_dt_on_regrid = flag; } + static void setComputeNewDtOnRegrid (bool flag) { compute_new_dt_on_regrid = flag; } static void Initialize (); static void Finalize (); @@ -430,7 +430,7 @@ protected: int small_plot_int; //!< How often small plotfile (# of time steps) Real small_plot_per; //!< How often small plotfile (in units of time) Real small_plot_log_per; //!< How often small plotfile (in units of log10(time)) - int write_plotfile_with_checkpoint; //!< Write out a plotfile whenever we checkpoint + bool write_plotfile_with_checkpoint; //!< Write out a plotfile whenever we checkpoint int file_name_digits; //!< How many digits to use in the plotfile and checkpoint names int message_int; //!< How often checking messages touched by user, such as "stop_run" std::string plot_file_root; //!< Root name of plotfile. @@ -473,7 +473,7 @@ protected: static Vector initial_ba; //! Array of BoxArrays read in to externally define grid hierarchy at each regrid static Vector regrid_ba; - static int compute_new_dt_on_regrid; + static bool compute_new_dt_on_regrid; #if defined(AMREX_USE_SENSEI_INSITU) && !defined(AMREX_NO_SENSEI_AMR_INST) static AmrInSituBridge *insitu_bridge; diff --git a/Src/Amr/AMReX_Amr.cpp b/Src/Amr/AMReX_Amr.cpp index 8df6f5cd423..2f8c110359b 100644 --- a/Src/Amr/AMReX_Amr.cpp +++ b/Src/Amr/AMReX_Amr.cpp @@ -59,7 +59,7 @@ bool Amr::first_plotfile; bool Amr::first_smallplotfile; Vector Amr::initial_ba; Vector Amr::regrid_ba; -int Amr::compute_new_dt_on_regrid; +bool Amr::compute_new_dt_on_regrid; #if defined(AMREX_USE_SENSEI_INSITU) && !defined(AMREX_NO_SENSEI_AMR_INST) AmrInSituBridge* Amr::insitu_bridge; #endif @@ -80,12 +80,12 @@ namespace #endif bool plot_files_output; int checkpoint_nfiles; - int regrid_on_restart; - int force_regrid_level_zero; - int use_efficient_regrid; - int plotfile_on_restart; - int insitu_on_restart; - int checkpoint_on_restart; + bool regrid_on_restart; + bool force_regrid_level_zero; + bool use_efficient_regrid; + bool plotfile_on_restart; + bool insitu_on_restart; + bool checkpoint_on_restart; bool checkpoint_files_output; bool precreateDirectories; bool prereadFAHeaders; @@ -117,14 +117,14 @@ Amr::Initialize () #endif plot_files_output = true; checkpoint_nfiles = 64; - regrid_on_restart = 0; - force_regrid_level_zero = 0; - use_efficient_regrid = 0; - plotfile_on_restart = 0; - insitu_on_restart = 0; - checkpoint_on_restart = 0; + regrid_on_restart = false; + force_regrid_level_zero = false; + use_efficient_regrid = false; + plotfile_on_restart = false; + insitu_on_restart = false; + checkpoint_on_restart = false; checkpoint_files_output = true; - compute_new_dt_on_regrid = 0; + compute_new_dt_on_regrid = false; precreateDirectories = true; prereadFAHeaders = true; plot_headerversion = VisMF::Header::Version_v1; @@ -257,17 +257,17 @@ Amr::InitAmr () // pp.queryAdd("regrid_on_restart",regrid_on_restart); pp.queryAdd("force_regrid_level_zero",force_regrid_level_zero); - pp.queryAdd("use_efficient_regrid",use_efficient_regrid); + pp.query("use_efficient_regrid",use_efficient_regrid); pp.queryAdd("plotfile_on_restart",plotfile_on_restart); - pp.queryAdd("insitu_on_restart",insitu_on_restart); + pp.query("insitu_on_restart",insitu_on_restart); pp.queryAdd("checkpoint_on_restart",checkpoint_on_restart); pp.queryAdd("compute_new_dt_on_regrid",compute_new_dt_on_regrid); - pp.queryAdd("mffile_nstreams", mffile_nstreams); + pp.query("mffile_nstreams", mffile_nstreams); #ifndef AMREX_NO_PROBINIT - pp.queryAdd("probinit_natonce", probinit_natonce); + pp.query("probinit_natonce", probinit_natonce); probinit_natonce = std::max(1, std::min(ParallelDescriptor::NProcs(), probinit_natonce)); #endif @@ -502,13 +502,13 @@ Amr::InitAmr () } loadbalance_with_workestimates = 0; - pp.queryAdd("loadbalance_with_workestimates", loadbalance_with_workestimates); + pp.query("loadbalance_with_workestimates", loadbalance_with_workestimates); loadbalance_level0_int = 2; - pp.queryAdd("loadbalance_level0_int", loadbalance_level0_int); + pp.query("loadbalance_level0_int", loadbalance_level0_int); loadbalance_max_fac = 1.5; - pp.queryAdd("loadbalance_max_fac", loadbalance_max_fac); + pp.query("loadbalance_max_fac", loadbalance_max_fac); } int @@ -2008,7 +2008,7 @@ Amr::timeStep (int level, // if (plotfile_on_restart && ! (restart_chkfile.empty()) ) { - plotfile_on_restart = 0; + plotfile_on_restart = false; writePlotFile(); } // @@ -2610,7 +2610,7 @@ Amr::regrid (int lbase, // // If use_efficient_regrid flag is set and grids are unchanged, then don't do anything more here. // - if (use_efficient_regrid == 1 && grids_unchanged ) + if (use_efficient_regrid == true && grids_unchanged ) { if (verbose > 0) { amrex::Print() << "Regridding at level lbase = " << lbase @@ -2673,7 +2673,7 @@ Amr::regrid (int lbase, { // // Init with data from old structure then remove old structure. - // NOTE: The init function may use a filPatch from the old level, + // NOTE: The init function may use a fillPatch from the old level, // which therefore needs remain in the hierarchy during the call. // a->init(*amr_level[lev]); @@ -2820,7 +2820,7 @@ Amr::InstallNewDistributionMap (int lev, const DistributionMapping& newdm) void Amr::regrid_level_0_on_restart() { - regrid_on_restart = 0; + regrid_on_restart = false; // // Coarsening before we split the grids ensures that each resulting // grid will have an even number of cells in each direction. @@ -3121,6 +3121,7 @@ Amr::initSubcycle () BL_PROFILE("Amr::initSubcycle()"); ParmParse pp("amr"); sub_cycle = true; + subcycling_mode = "Auto"; if (pp.contains("nosub")) { if (verbose) { @@ -3136,11 +3137,8 @@ Amr::initSubcycle () } subcycling_mode = "None"; } - else - { - subcycling_mode = "Auto"; - pp.queryAdd("subcycling_mode",subcycling_mode); - } + + pp.queryAdd("subcycling_mode",subcycling_mode); if (subcycling_mode == "None") { @@ -3292,25 +3290,25 @@ Amr::initPltAndChk () } } - write_plotfile_with_checkpoint = 1; + write_plotfile_with_checkpoint = true; pp.queryAdd("write_plotfile_with_checkpoint",write_plotfile_with_checkpoint); stream_max_tries = 4; - pp.queryAdd("stream_max_tries",stream_max_tries); + pp.query("stream_max_tries",stream_max_tries); stream_max_tries = std::max(stream_max_tries, 1); abort_on_stream_retry_failure = false; - pp.queryAdd("abort_on_stream_retry_failure",abort_on_stream_retry_failure); + pp.query("abort_on_stream_retry_failure",abort_on_stream_retry_failure); - pp.queryAdd("precreateDirectories", precreateDirectories); - pp.queryAdd("prereadFAHeaders", prereadFAHeaders); + pp.query("precreateDirectories", precreateDirectories); + pp.query("prereadFAHeaders", prereadFAHeaders); int phvInt(plot_headerversion), chvInt(checkpoint_headerversion); - pp.queryAdd("plot_headerversion", phvInt); + pp.query("plot_headerversion", phvInt); if(phvInt != plot_headerversion) { plot_headerversion = static_cast (phvInt); } - pp.queryAdd("checkpoint_headerversion", chvInt); + pp.query("checkpoint_headerversion", chvInt); if(chvInt != checkpoint_headerversion) { checkpoint_headerversion = static_cast (chvInt); } diff --git a/Src/AmrCore/AMReX_AmrMesh.H b/Src/AmrCore/AMReX_AmrMesh.H index f5d49f5c5da..c915f6011d4 100644 --- a/Src/AmrCore/AMReX_AmrMesh.H +++ b/Src/AmrCore/AMReX_AmrMesh.H @@ -26,7 +26,11 @@ struct AmrInfo { //! Blocking factor in grid generation (by level). Vector blocking_factor {{IntVect(8)}}; //! Maximum allowable grid size (by level). +#if defined(AMREX_USE_GPU) + Vector max_grid_size {{IntVect(AMREX_D_PICK(128,128,64))}}; +#else Vector max_grid_size {{IntVect(AMREX_D_PICK(128,128,32))}}; +#endif //! Buffer cells around each tagged cell. Vector n_error_buf {{IntVect(1)}}; //! Grid efficiency. @@ -178,7 +182,7 @@ public: void SetGeometry (int lev, const Geometry& geom_in) noexcept; //! Given domain box, return AMR level. Return -1 if there is no match. - int GetLevel (Box const& domain) noexcept; + [[nodiscard]] int GetLevel (Box const& domain) const noexcept; void ClearDistributionMap (int lev) noexcept; void ClearBoxArray (int lev) noexcept; @@ -201,7 +205,7 @@ public: //! Return the largest allowable grid. [[nodiscard]] const IntVect& maxGridSize (int lev) const noexcept { return max_grid_size[lev]; } - [[nodiscard]] bool LevelDefined (int lev) noexcept; + [[nodiscard]] bool LevelDefined (int lev) const noexcept; //! Should we keep the coarser grids fixed (and not regrid those levels) at all? [[nodiscard]] bool useFixedCoarseGrids () const noexcept { return use_fixed_coarse_grids; } @@ -255,7 +259,7 @@ public: [[nodiscard]] virtual BoxArray GetAreaNotToTag (int /*lev*/) { return BoxArray(); } - [[nodiscard]] long CountCells (int lev) noexcept; + [[nodiscard]] Long CountCells (int lev) const noexcept; [[nodiscard]] virtual DistributionMapping MakeDistributionMap (int lev, BoxArray const& ba); diff --git a/Src/AmrCore/AMReX_AmrMesh.cpp b/Src/AmrCore/AMReX_AmrMesh.cpp index 0ed59002f2e..388ba55dcf2 100644 --- a/Src/AmrCore/AMReX_AmrMesh.cpp +++ b/Src/AmrCore/AMReX_AmrMesh.cpp @@ -78,7 +78,9 @@ AmrMesh::InitAmrMesh (int max_level_in, const Vector& n_cell_in, { ParmParse pp("amr"); - pp.queryAdd("v",verbose); + if (! pp.query("verbose", "v", verbose)) { + pp.add("verbose", verbose); + } if (max_level_in == -1) { pp.get("max_level", max_level); @@ -89,28 +91,17 @@ AmrMesh::InitAmrMesh (int max_level_in, const Vector& n_cell_in, int nlev = max_level + 1; - blocking_factor.resize(nlev); - max_grid_size.resize(nlev); - n_error_buf.resize(nlev); + AmrInfo def_amr_info; + + blocking_factor.resize(nlev, def_amr_info.blocking_factor.back()); + max_grid_size.resize (nlev, def_amr_info.max_grid_size.back()); + n_error_buf.resize (nlev, def_amr_info.n_error_buf.back()); + ref_ratio.resize (nlev, def_amr_info.ref_ratio.back()); geom.resize(nlev); dmap.resize(nlev); grids.resize(nlev); - for (int i = 0; i < nlev; ++i) { - n_error_buf[i] = IntVect{AMREX_D_DECL(1,1,1)}; - blocking_factor[i] = IntVect{AMREX_D_DECL(8,8,8)}; - max_grid_size[i] = (AMREX_SPACEDIM == 2) ? IntVect{AMREX_D_DECL(128,128,128)} - : IntVect{AMREX_D_DECL(32,32,32)}; - } - - // Make the default ref_ratio = 2 for all levels. - ref_ratio.resize(max_level); - for (int i = 0; i < max_level; ++i) - { - ref_ratio[i] = 2 * IntVect::TheUnitVector(); - } - pp.queryAdd("n_proper",n_proper); pp.queryAdd("grid_eff",grid_eff); int cnt = pp.countval("n_error_buf"); @@ -371,11 +362,15 @@ AmrMesh::InitAmrMesh (int max_level_in, const Vector& n_cell_in, { pp.queryAdd("refine_grid_layout", refine_grid_layout); - refine_grid_layout_dims = IntVect(refine_grid_layout); - AMREX_D_TERM(pp.queryAdd("refine_grid_layout_x", refine_grid_layout_dims[0]);, - pp.queryAdd("refine_grid_layout_y", refine_grid_layout_dims[1]);, - pp.queryAdd("refine_grid_layout_z", refine_grid_layout_dims[2])); - + std::array tmp{AMREX_D_DECL(refine_grid_layout, + refine_grid_layout, + refine_grid_layout)}; + AMREX_D_TERM(pp.query("refine_grid_layout_x", tmp[0]);, + pp.query("refine_grid_layout_y", tmp[1]);, + pp.query("refine_grid_layout_z", tmp[2])); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + refine_grid_layout_dims[idim] = tmp[idim]; + } refine_grid_layout = refine_grid_layout_dims != 0; } @@ -421,9 +416,9 @@ AmrMesh::SetGeometry (int lev, const Geometry& geom_in) noexcept } int -AmrMesh::GetLevel (Box const& domain) noexcept +AmrMesh::GetLevel (Box const& domain) const noexcept { - Box ccdomain = amrex::enclosedCells(domain); + Box const& ccdomain = amrex::enclosedCells(domain); for (int lev = 0; lev < geom.size(); ++lev) { if (geom[lev].Domain() == ccdomain) { return lev; } } @@ -443,7 +438,7 @@ AmrMesh::ClearBoxArray (int lev) noexcept } bool -AmrMesh::LevelDefined (int lev) noexcept +AmrMesh::LevelDefined (int lev) const noexcept { return lev <= max_level && !grids[lev].empty() && !dmap[lev].empty(); } @@ -476,6 +471,9 @@ AmrMesh::ChopGrids (int lev, BoxArray& ba, int target_size) const IntVect chunk = max_grid_size[lev]; chunk.min(Geom(lev).Domain().length()); + // Note that ba already satisfies the max_grid_size requirement and it's + // coarsenable if it's a fine level BoxArray. + while (ba.size() < target_size) { IntVect chunk_prev = chunk; @@ -490,11 +488,24 @@ AmrMesh::ChopGrids (int lev, BoxArray& ba, int target_size) const int idim = chunk_dir[idx].second; if (refine_grid_layout_dims[idim]) { int new_chunk_size = chunk[idim] / 2; + int rr = (lev > 0) ? ref_ratio[lev-1][idim] : 1; + if (rr > 1) { + new_chunk_size = (new_chunk_size/rr) * rr; + } if (new_chunk_size != 0 && new_chunk_size%blocking_factor[lev][idim] == 0) { chunk[idim] = new_chunk_size; - ba.maxSize(chunk); + if (rr == 1) { + ba.maxSize(chunk); + } else { + IntVect bf(1); + bf[idim] = rr; + // Note that only idim-direction will be chopped by + // minmaxSize because the sizes in other directions + // are already smaller than chunk. + ba.minmaxSize(bf, chunk); + } break; } } @@ -1216,8 +1227,8 @@ AmrMesh::checkInput () } } -long -AmrMesh::CountCells (int lev) noexcept +Long +AmrMesh::CountCells (int lev) const noexcept { return grids[lev].numPts(); } diff --git a/Src/AmrCore/AMReX_ErrorList.cpp b/Src/AmrCore/AMReX_ErrorList.cpp index c9d1a7feef8..7e72cd956a7 100644 --- a/Src/AmrCore/AMReX_ErrorList.cpp +++ b/Src/AmrCore/AMReX_ErrorList.cpp @@ -258,9 +258,9 @@ AMRErrorTag::operator() (TagBoxArray& tba, auto const& tagma = tba.arrays(); if (m_test == BOX) { - const auto plo = geom.ProbLoArray(); - const auto dx = geom.CellSizeArray(); - const auto tag_rb = m_info.m_realbox; + const auto& plo = geom.ProbLoArray(); + const auto& dx = geom.CellSizeArray(); + const auto& tag_rb = m_info.m_realbox; ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept { GpuArray pt @@ -277,7 +277,7 @@ AMRErrorTag::operator() (TagBoxArray& tba, auto const& datma = mf->const_arrays(); auto threshold = m_value[level]; auto const volume_weighting = m_info.m_volume_weighting; - auto geomdata = geom.data(); + auto const& geomdata = geom.data(); auto tag_update = tagval; if (m_info.m_derefine) { tag_update = clearval; diff --git a/Src/AmrCore/AMReX_FillPatchUtil_I.H b/Src/AmrCore/AMReX_FillPatchUtil_I.H index 02b5523cab5..15aaf3004dd 100644 --- a/Src/AmrCore/AMReX_FillPatchUtil_I.H +++ b/Src/AmrCore/AMReX_FillPatchUtil_I.H @@ -519,7 +519,7 @@ namespace detail { const FabArrayBase::CPC mask_cpc( mf_solution, IntVect::TheZeroVector(), mf_known, IntVect::TheZeroVector(), - fgeom.periodicity()); + cgeom.periodicity()); solve_mask.setVal(1); // Values to solve. solve_mask.setVal(0, mask_cpc, 0, 1); // Known values. @@ -681,7 +681,7 @@ namespace detail { const FabArrayBase::CPC mask_cpc( mf_solution, IntVect::TheZeroVector(), mf_known, IntVect::TheZeroVector(), - fgeom.periodicity() ); + cgeom.periodicity() ); solve_mask[d].setVal(1); // Values to solve. solve_mask[d].setVal(0, mask_cpc, 0, 1); // Known values. diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index b29ba4aca40..382962d11b2 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -294,13 +294,13 @@ facediv_int (int ci, int cj, int ck, int nf, + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w000+w012-w002-w010) + dy3/(8*dz*xspys)*(w100+w112-w102-w110); - fine[1](fi+1, fj+1, fk , nf) = Real(0.5)*(v001+v021) + fine[1](fi+1, fj+1, fk , nf) = Real(0.5)*(v100+v120) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u000+u210-u010-u200) + dy3/(8*dx*yspzs)*(u001+u211-u011-u201) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w100+w112-w102-w110) + dy3/(8*dz*xspys)*(w000+w012-w002-w010); - fine[1](fi , fj+1, fk+1, nf) = Real(0.5)*(v100+v120) + fine[1](fi , fj+1, fk+1, nf) = Real(0.5)*(v001+v021) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u001+u211-u011-u201) + dy3/(8*dx*yspzs)*(u000+u210-u010-u200) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w000+w012-w002-w010) @@ -319,17 +319,17 @@ facediv_int (int ci, int cj, int ck, int nf, + dz3/(8*dy*zspxs)*(v100+v121-v101-v120); fine[2](fi , fj+1, fk+1, nf) = Real(0.5)*(w010+w012) - + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u000+u201-u001-u200) - + dz3/(8*dx*yspzs)*(u010+u211-u011-u210) - + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v100+v121-v101-v120) - + dz3/(8*dy*zspxs)*(v000+v021-v001-v020); - - fine[2](fi+1, fj , fk+1, nf) = Real(0.5)*(w100+w102) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u010+u211-u011-u210) + dz3/(8*dx*yspzs)*(u000+u201-u001-u200) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v000+v021-v001-v020) + dz3/(8*dy*zspxs)*(v100+v121-v101-v120); + fine[2](fi+1, fj , fk+1, nf) = Real(0.5)*(w100+w102) + + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u000+u201-u001-u200) + + dz3/(8*dx*yspzs)*(u010+u211-u011-u210) + + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v100+v121-v101-v120) + + dz3/(8*dy*zspxs)*(v000+v021-v001-v020); + fine[2](fi+1, fj+1, fk+1, nf) = Real(0.5)*(w110+w112) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u010+u211-u011-u210) + dz3/(8*dx*yspzs)*(u000+u201-u001-u200) diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H index c13fb283a35..6333112aae8 100644 --- a/Src/AmrCore/AMReX_Interpolater.H +++ b/Src/AmrCore/AMReX_Interpolater.H @@ -565,6 +565,9 @@ public: * of the divergence of the underlying crse cell. All fine cells overlying * a given coarse cell will have the same divergence, even when the coarse * grid divergence is spatially varying. +* Based on Vanella et. al. (doi:10.1016/j.jcp.2010.05.003, section 3.2), +* but solves the interior closure problem using least squares with an +* initial guess equal to the average of fine face values across the cell. */ class FaceDivFree : diff --git a/Src/Base/AMReX.H b/Src/Base/AMReX.H index 2b88553bcdf..4094d4f2ff2 100644 --- a/Src/Base/AMReX.H +++ b/Src/Base/AMReX.H @@ -10,10 +10,12 @@ #include #include +#include #include #include #include #include +#include #include // @@ -47,23 +49,25 @@ namespace amrex extern AMREX_EXPORT int verbose; - extern AMREX_EXPORT int signal_handling; - extern AMREX_EXPORT int handle_sigsegv; - extern AMREX_EXPORT int handle_sigterm; - extern AMREX_EXPORT int handle_sigint; - extern AMREX_EXPORT int handle_sigabrt; - extern AMREX_EXPORT int handle_sigfpe; + extern AMREX_EXPORT bool signal_handling; + extern AMREX_EXPORT bool handle_sigsegv; + extern AMREX_EXPORT bool handle_sigterm; + extern AMREX_EXPORT bool handle_sigint; + extern AMREX_EXPORT bool handle_sigabrt; + extern AMREX_EXPORT bool handle_sigfpe; - extern AMREX_EXPORT int call_addr2line; - extern AMREX_EXPORT int throw_exception; + extern AMREX_EXPORT bool call_addr2line; + extern AMREX_EXPORT bool throw_exception; - extern AMREX_EXPORT int regtest_reduction; + extern AMREX_EXPORT bool regtest_reduction; extern AMREX_EXPORT std::ostream* osout; extern AMREX_EXPORT std::ostream* oserr; extern AMREX_EXPORT ErrorHandler error_handler; - extern AMREX_EXPORT int abort_on_unused_inputs; + extern AMREX_EXPORT bool abort_on_unused_inputs; + + extern AMREX_EXPORT bool init_snan; } /** the AMReX "git describe" version */ @@ -197,6 +201,9 @@ namespace amrex [[nodiscard]] int Verbose () noexcept; void SetVerbose (int v) noexcept; + [[nodiscard]] bool InitSNaN () noexcept; + void SetInitSNaN (bool v) noexcept; + // ! Get the entire command line including the executable [[nodiscard]] std::string get_command (); @@ -277,6 +284,65 @@ namespace amrex Geometry* m_geom = nullptr; }; + + enum struct FPExcept : std::uint8_t { + none = 0B0000, + invalid = 0B0001, + zero = 0B0010, + overflow = 0B0100, + all = 0B0111 + }; + + [[nodiscard]] inline bool any (FPExcept a) { return a != FPExcept::none; } + + [[nodiscard]] inline FPExcept operator| (FPExcept a, FPExcept b) + { + using T = std::underlying_type_t; + return static_cast(static_cast(a) | static_cast(b)); + } + + [[nodiscard]] inline FPExcept operator& (FPExcept a, FPExcept b) + { + using T = std::underlying_type_t; + return static_cast(static_cast(a) & static_cast(b)); + } + + //! Return currently enabled FP exceptions. Linux only. + [[nodiscard]] FPExcept getFPExcept (); + + //! Set FP exception traps. Linux only. This enables set flags and + //! DISABLES unset flags. This can be used to restore previous settings. + FPExcept setFPExcept (FPExcept excepts); + + /** + * \brief Disable FP exceptions. Linux Only + * + * This function disables given exception traps and keeps the status of + * the others. The example below disables FPE invalid and + * divide-by-zero, and later restores the previous settings. + * + \verbatim + auto prev_excepts = disableFPExcept(FPExcept::invalid | FPExcept::zero); + // .... + setFPExcept(prev_excepts); // restore previous settings + \endverbatim + */ + [[nodiscard]] FPExcept disableFPExcept (FPExcept excepts); + + /** + * \brief Enable FP exceptions. Linux Only + * + * This function enables given exception traps and keeps the status of + * the others. The example below enables all FPE traps, and later + * restores the previous settings. + * + \verbatim + auto prev_excepts = disableFPExcept(FPExcept::all); + // .... + setFPExcept(prev_excepts); // restore previous settings + \endverbatim + */ + [[nodiscard]] FPExcept enableFPExcept (FPExcept excepts); } #endif /*BL_AMREX_H*/ diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp index 2d6e7626c80..b4aa8f44909 100644 --- a/Src/Base/AMReX.cpp +++ b/Src/Base/AMReX.cpp @@ -99,20 +99,25 @@ namespace system { std::string exename; int verbose = 1; - int signal_handling; - int handle_sigsegv; - int handle_sigterm; - int handle_sigint; - int handle_sigabrt; - int handle_sigfpe; - int handle_sigill; - int call_addr2line; - int throw_exception; - int regtest_reduction; - int abort_on_unused_inputs = 0; + bool signal_handling; + bool handle_sigsegv; + bool handle_sigterm; + bool handle_sigint; + bool handle_sigabrt; + bool handle_sigfpe; + bool handle_sigill; + bool call_addr2line; + bool throw_exception; + bool regtest_reduction; + bool abort_on_unused_inputs = false; std::ostream* osout = &std::cout; std::ostream* oserr = &std::cerr; ErrorHandler error_handler = nullptr; +#if defined(AMREX_DEBUG) || defined(AMREX_TESTING) + bool init_snan = true; +#else + bool init_snan = false; +#endif } } @@ -143,11 +148,11 @@ namespace { #ifdef AMREX_USE_HYPRE namespace { - int init_hypre = 1; + bool init_hypre = true; #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) - int hypre_spgemm_use_vendor = 0; - int hypre_spmv_use_vendor = 0; - int hypre_sptrans_use_vendor = 0; + bool hypre_spgemm_use_vendor = false; + bool hypre_spmv_use_vendor = false; + bool hypre_sptrans_use_vendor = false; #endif } #endif @@ -156,6 +161,10 @@ int amrex::Verbose () noexcept { return amrex::system::verbose; } void amrex::SetVerbose (int v) noexcept { amrex::system::verbose = v; } +bool amrex::InitSNaN () noexcept { return amrex::system::init_snan; } + +void amrex::SetInitSNaN (bool v) noexcept { amrex::system::init_snan = v; } + void amrex::SetErrorHandler (amrex::ErrorHandler f) { amrex::system::error_handler = f; } @@ -327,16 +336,16 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, { system::exename.clear(); // system::verbose = 0; - system::regtest_reduction = 0; - system::signal_handling = 1; - system::handle_sigsegv = 1; - system::handle_sigterm = 0; - system::handle_sigint = 1; - system::handle_sigabrt = 1; - system::handle_sigfpe = 1; - system::handle_sigill = 1; - system::call_addr2line = 1; - system::throw_exception = 0; + system::regtest_reduction = false; + system::signal_handling = true; + system::handle_sigsegv = true; + system::handle_sigterm = false; + system::handle_sigint = true; + system::handle_sigabrt = true; + system::handle_sigfpe = true; + system::handle_sigill = true; + system::call_addr2line = true; + system::throw_exception = false; system::osout = &a_osout; system::oserr = &a_oserr; system::error_handler = a_errhandler; @@ -442,8 +451,10 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, { ParmParse pp("amrex"); - pp.queryAdd("v", system::verbose); - pp.queryAdd("verbose", system::verbose); + if (! pp.query("verbose", "v", system::verbose)) { + pp.add("verbose", system::verbose); + } + pp.queryAdd("init_snan", system::init_snan); } if (system::verbose > 0) { @@ -501,10 +512,10 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, { ParmParse pp("amrex"); - pp.queryAdd("regtest_reduction", system::regtest_reduction); + pp.query("regtest_reduction", system::regtest_reduction); pp.queryAdd("signal_handling", system::signal_handling); pp.queryAdd("throw_exception", system::throw_exception); - pp.queryAdd("call_addr2line", system::call_addr2line); + pp.query("call_addr2line", system::call_addr2line); pp.queryAdd("abort_on_unused_inputs", system::abort_on_unused_inputs); #ifdef AMREX_USE_SYCL @@ -550,10 +561,21 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, prev_handler_sigabrt = SIG_ERR; // NOLINT(performance-no-int-to-ptr) } - prev_handler_sigfpe = SIG_ERR; // NOLINT(performance-no-int-to-ptr) + if (system::handle_sigfpe) { + prev_handler_sigfpe = std::signal(SIGFPE, BLBackTrace::handler); + } else { + prev_handler_sigfpe = SIG_ERR; // NOLINT(performance-no-int-to-ptr) + } + + if (system::handle_sigill) { + prev_handler_sigill = std::signal(SIGILL, BLBackTrace::handler); + } else { + prev_handler_sigill = SIG_ERR; // NOLINT(performance-no-int-to-ptr) + } + if (system::handle_sigfpe) { - int invalid = 0, divbyzero=0, overflow=0; + bool invalid = false, divbyzero=false, overflow=false; pp.queryAdd("fpe_trap_invalid", invalid); pp.queryAdd("fpe_trap_zero", divbyzero); pp.queryAdd("fpe_trap_overflow", overflow); @@ -566,7 +588,6 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, prev_fpe_excepts = fegetexcept(); if (curr_fpe_excepts != 0) { feenableexcept(curr_fpe_excepts); // trap floating point exceptions - prev_handler_sigfpe = std::signal(SIGFPE, BLBackTrace::handler); } #elif defined(__APPLE__) && defined(__x86_64__) @@ -577,16 +598,14 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, if (overflow) { curr_fpe_excepts |= _MM_MASK_OVERFLOW; } if (curr_fpe_excepts != 0u) { _MM_SET_EXCEPTION_MASK(prev_fpe_mask & ~curr_fpe_excepts); - prev_handler_sigfpe = std::signal(SIGFPE, BLBackTrace::handler); } #endif } - prev_handler_sigill = SIG_ERR; // NOLINT(performance-no-int-to-ptr) +#if defined(__APPLE__) && defined(__aarch64__) if (system::handle_sigill) { -#if defined(__APPLE__) && defined(__aarch64__) - int invalid = 0, divbyzero=0, overflow=0; + bool invalid = false, divbyzero=false, overflow=false; pp.queryAdd("fpe_trap_invalid", invalid); pp.queryAdd("fpe_trap_zero", divbyzero); pp.queryAdd("fpe_trap_overflow", overflow); @@ -598,9 +617,8 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, if (overflow) { env.__fpcr |= __fpcr_trap_overflow; } fesetenv(&env); // SIGILL ref: https://developer.apple.com/forums/thread/689159 -#endif - prev_handler_sigill = std::signal(SIGILL, BLBackTrace::handler); } +#endif } #ifdef AMREX_USE_HYPRE @@ -913,4 +931,63 @@ AMReX::erase (AMReX* pamrex) } } +FPExcept getFPExcept () +{ + auto r = FPExcept::none; +#if defined(__linux__) + auto excepts = fegetexcept(); + if (excepts & FE_INVALID ) { r = r | FPExcept::invalid ; } + if (excepts & FE_DIVBYZERO) { r = r | FPExcept::zero ; } + if (excepts & FE_OVERFLOW ) { r = r | FPExcept::overflow; } +#endif + return r; +} + +FPExcept setFPExcept (FPExcept excepts) +{ + auto prev = getFPExcept(); +#if defined(__linux__) + int flags = FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW; + fedisableexcept(flags); + flags = 0; + if (any(excepts & FPExcept::invalid )) { flags |= FE_INVALID ; } + if (any(excepts & FPExcept::zero )) { flags |= FE_DIVBYZERO; } + if (any(excepts & FPExcept::overflow)) { flags |= FE_OVERFLOW ; } + feenableexcept(flags); +#else + amrex::ignore_unused(excepts); +#endif + return prev; +} + +FPExcept disableFPExcept (FPExcept excepts) +{ + auto prev = getFPExcept(); +#if defined(__linux__) + int flags = 0; + if (any(excepts & FPExcept::invalid )) { flags |= FE_INVALID ; } + if (any(excepts & FPExcept::zero )) { flags |= FE_DIVBYZERO; } + if (any(excepts & FPExcept::overflow)) { flags |= FE_OVERFLOW ; } + fedisableexcept(flags); +#else + amrex::ignore_unused(excepts); +#endif + return prev; +} + +FPExcept enableFPExcept (FPExcept excepts) +{ + auto prev = getFPExcept(); +#if defined(__linux__) + int flags = 0; + if (any(excepts & FPExcept::invalid )) { flags |= FE_INVALID ; } + if (any(excepts & FPExcept::zero )) { flags |= FE_DIVBYZERO; } + if (any(excepts & FPExcept::overflow)) { flags |= FE_OVERFLOW ; } + feenableexcept(flags); +#else + amrex::ignore_unused(excepts); +#endif + return prev; +} + } diff --git a/Src/Base/AMReX_Array.H b/Src/Base/AMReX_Array.H index 525133cde87..15ddde4d1e9 100644 --- a/Src/Base/AMReX_Array.H +++ b/Src/Base/AMReX_Array.H @@ -127,6 +127,15 @@ namespace amrex { return p; } + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + GpuArray& operator+= (GpuArray const& a) noexcept + { + for (unsigned int i = 0; i < N; ++i) { + arr[i] += a.arr[i]; + } + return *this; + } + T arr[amrex::max(N,1U)]; }; } diff --git a/Src/Base/AMReX_AsyncOut.cpp b/Src/Base/AMReX_AsyncOut.cpp index b235baa6580..3835c0b137e 100644 --- a/Src/Base/AMReX_AsyncOut.cpp +++ b/Src/Base/AMReX_AsyncOut.cpp @@ -10,7 +10,7 @@ namespace amrex::AsyncOut { namespace { -int s_asyncout = false; +bool s_asyncout = false; int s_noutfiles = 64; MPI_Comm s_comm = MPI_COMM_NULL; diff --git a/Src/Base/AMReX_BLBackTrace.cpp b/Src/Base/AMReX_BLBackTrace.cpp index d511a192728..d065bd71c05 100644 --- a/Src/Base/AMReX_BLBackTrace.cpp +++ b/Src/Base/AMReX_BLBackTrace.cpp @@ -13,12 +13,13 @@ #include #endif +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include #if !(defined(_MSC_VER) && defined(__CUDACC__)) //MSVC can't pre-processor cfenv with `Zc:preprocessor` @@ -177,6 +178,18 @@ namespace { } return r; } + +#ifdef __linux__ + bool command_exists(std::string const &cmd) + { + // command -v is part of POSIX so should be available + std::string check_command = "command -v " + cmd + " > /dev/null 2>&1"; + int r = std::system(check_command.c_str()); + // return value of std::system is implementation defined and can be + // decoded using WEXITSTATUS but it should be 0 on success + return r == 0; + } +#endif } #endif @@ -209,19 +222,32 @@ BLBackTrace::print_backtrace_info (FILE* f) int have_addr2line = 0; std::string eu_cmd; { - have_eu_addr2line = file_exists("/usr/bin/eu-addr2line"); + if (command_exists("eu-addr2line")) { + have_eu_addr2line = 1; + eu_cmd = "eu-addr2line"; + } else { + std::string eu_fallback_path = "/usr/bin/eu-addr2line"; + have_eu_addr2line = file_exists(eu_fallback_path.c_str()); + eu_cmd = std::move(eu_fallback_path); + } if (have_eu_addr2line) { const pid_t pid = getpid(); // cmd = "/usr/bin/eu-addr2line -C -f -i --pretty-print -p " - eu_cmd = "/usr/bin/eu-addr2line -C -f -i -p " - + std::to_string(pid); + eu_cmd += " -C -f -i -p " + std::to_string(pid); } } std::string cmd; { - have_addr2line = file_exists("/usr/bin/addr2line"); + if (command_exists("addr2line")) { + have_addr2line = 1; + cmd = "addr2line"; + } else { + std::string fallback_path = "/usr/bin/addr2line"; + have_addr2line = file_exists(fallback_path.c_str()); + cmd = std::move(fallback_path); + } if (have_addr2line) { - cmd = "/usr/bin/addr2line -Cpfie " + amrex::system::exename; + cmd += " -Cpfie " + amrex::system::exename; } } diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index c1212fb7a0a..db9289894ca 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -457,6 +457,15 @@ public: void getVal (T* data, const IntVect& pos, int N, int numcomp) const noexcept; //! Same as above, except that starts at component 0 and copies all comps. void getVal (T* data, const IntVect& pos) const noexcept; + +#if defined(AMREX_USE_GPU) + template || std::is_same_v,int> FOO = 0> + void fill_snan () noexcept; + /** * \brief The setVal functions set sub-regions in the BaseFab to a * constant value. This most general form specifies the sub-box, @@ -1829,6 +1838,15 @@ BaseFab::shiftHalf (int idir, int n_cell) noexcept return *this; } +template +template || std::is_same_v, int> FOO> +void +BaseFab::fill_snan () noexcept +{ + amrex::fill_snan(this->dptr, this->truesize); +} + template template void @@ -1924,6 +1942,20 @@ BaseFab::define () placementNew(this->dptr, this->truesize); amrex::update_fab_stats(this->domain.numPts(), this->truesize, sizeof(T)); + + if constexpr (std::is_same_v || std::is_same_v) { + if (amrex::InitSNaN() && this->truesize > 0) { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && arena()->isDeviceAccessible()) { + this->template fill_snan(); + Gpu::streamSynchronize(); + } else +#endif + { + this->template fill_snan(); + } + } + } } template diff --git a/Src/Base/AMReX_BaseFwd.H b/Src/Base/AMReX_BaseFwd.H index 824a6c504e7..d187673869a 100644 --- a/Src/Base/AMReX_BaseFwd.H +++ b/Src/Base/AMReX_BaseFwd.H @@ -1,5 +1,6 @@ #ifndef AMREX_BASE_FWD_H_ #define AMREX_BASE_FWD_H_ +#include namespace amrex { @@ -21,11 +22,15 @@ class BoxList; class DistributionMapping; class Geometry; -class Box; +template +class BoxND; +using Box = BoxND; template class IntVectND; using IntVect = IntVectND; -class IndexType; +template +class IndexTypeND; +using IndexType = IndexTypeND; struct Dim3; struct XDim3; class RealBox; diff --git a/Src/Base/AMReX_Box.H b/Src/Base/AMReX_Box.H index f29428b1022..7d4cb30c43e 100644 --- a/Src/Base/AMReX_Box.H +++ b/Src/Base/AMReX_Box.H @@ -21,59 +21,61 @@ namespace amrex { +template +class BoxND; +using Box = BoxND; class BoxCommHelper; /** * \brief A Rectangular Domain on an Integer Lattice * -* A Box is an abstraction for defining discrete regions of -* SPACEDIM indexing space. Boxes have an IndexType, which defines +* A BoxND is an abstraction for defining discrete regions of +* dim indexing space. Boxes have an IndexType, which defines * IndexType::CELL or IndexType::NODE based points for each direction * and a low and high INTVECT which defines the lower and upper corners -* of the Box. Boxes can exist in positive and negative indexing space. +* of the BoxND. Boxes can exist in positive and negative indexing space. * -* Box is a dimension dependent class, so SPACEDIM must be +* Box is a dimension dependent alias to BoxND, so AMREX_SPACEDIM must be * defined as either 1, 2, or 3 when compiling. */ -class Box +template +class BoxND { friend MPI_Datatype ParallelDescriptor::Mpi_typemap::type(); friend class BoxCommHelper; public: /* - * \brief The default constructor. For safety, the constructed Box is + * \brief The default constructor. For safety, the constructed BoxND is * invalid and may be tested for validity with ok(). * DO NOT CHANGE THIS BEHAVIOR! */ AMREX_GPU_HOST_DEVICE - constexpr Box () noexcept + constexpr BoxND () noexcept : smallend(1), bigend(0) {} - //! Construct cell-centered type Box. + //! Construct cell-centered type BoxND. AMREX_GPU_HOST_DEVICE - constexpr Box (const IntVect& small, const IntVect& big) noexcept + constexpr BoxND (const IntVectND& small, const IntVectND& big) noexcept : smallend(small), bigend(big) {} - //! Construct box with specified lengths. + //! Construct BoxND with specified lengths. AMREX_GPU_HOST_DEVICE - Box (const IntVect& small, const int* vec_len) noexcept + BoxND (const IntVectND& small, const int* vec_len) noexcept : smallend(small), - bigend(AMREX_D_DECL(small[0]+vec_len[0]-1, - small[1]+vec_len[1]-1, - small[2]+vec_len[2]-1)) + bigend(small + IntVectND(vec_len) - 1) {} /** - * \brief Construct Box with given type. small and big are expected + * \brief Construct BoxND with given type. small and big are expected * to be consistent with given type. */ AMREX_GPU_HOST_DEVICE - Box (const IntVect& small, const IntVect& big, const IntVect& typ) noexcept + BoxND (const IntVectND& small, const IntVectND& big, const IntVectND& typ) noexcept : smallend(small), bigend(big), btype(typ) @@ -83,27 +85,27 @@ public: //! Construct dimension specific Boxes. AMREX_GPU_HOST_DEVICE - Box (const IntVect& small, const IntVect& big, IndexType t) noexcept + BoxND (const IntVectND& small, const IntVectND& big, IndexTypeND t) noexcept : smallend(small), bigend(big), btype(t) {} - template + template = 0> AMREX_GPU_HOST_DEVICE - explicit Box (Array4 const& a) noexcept - : smallend(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)), - bigend (AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1)) + explicit BoxND (Array4 const& a) noexcept + : smallend(a.begin), + bigend(IntVectND(a.end) - 1) {} // dtor, copy-ctor, copy-op=, move-ctor, and move-op= are compiler generated. - //! Get the smallend of the box. + //! Get the smallend of the BoxND. [[nodiscard]] AMREX_GPU_HOST_DEVICE - const IntVect& smallEnd () const& noexcept { return smallend; } + const IntVectND& smallEnd () const& noexcept { return smallend; } - //! Get the smallend of the box. - [[nodiscard]] const IntVect& smallEnd () && = delete; + //! Get the smallend of the BoxND. + [[nodiscard]] const IntVectND& smallEnd () && = delete; //! Returns the coordinate of the low end in the given direction. [[nodiscard]] AMREX_GPU_HOST_DEVICE @@ -111,10 +113,10 @@ public: //! Get the bigend. [[nodiscard]] AMREX_GPU_HOST_DEVICE - const IntVect& bigEnd () const& noexcept { return bigend; } + const IntVectND& bigEnd () const& noexcept { return bigend; } //! Get the bigend. - [[nodiscard]] const IntVect& bigEnd () && = delete; + [[nodiscard]] const IntVectND& bigEnd () && = delete; //! Returns the coordinate of the high end in the given direction. [[nodiscard]] AMREX_GPU_HOST_DEVICE @@ -122,69 +124,53 @@ public: //! Returns the indexing type. [[nodiscard]] AMREX_GPU_HOST_DEVICE - IndexType ixType () const noexcept { return btype; } + IndexTypeND ixType () const noexcept { return btype; } //! Returns the indexing type. [[nodiscard]] AMREX_GPU_HOST_DEVICE - IntVect type () const noexcept { return btype.ixType(); } + IntVectND type () const noexcept { return btype.ixType(); } //! Returns the indexing type in the specified direction. [[nodiscard]] AMREX_GPU_HOST_DEVICE IndexType::CellIndex type (int dir) const noexcept { return btype.ixType(dir); } - //! Return the length of the Box. + //! Return the length of the BoxND. [[nodiscard]] AMREX_GPU_HOST_DEVICE - IntVect size () const noexcept + IntVectND size () const noexcept { - return IntVect(AMREX_D_DECL(bigend[0]-smallend[0] + 1, - bigend[1]-smallend[1] + 1, - bigend[2]-smallend[2] + 1)); + return bigend - smallend + 1; } - //! Return the length of the Box. + //! Return the length of the BoxND. [[nodiscard]] AMREX_GPU_HOST_DEVICE - IntVect length () const noexcept + IntVectND length () const noexcept { - return IntVect(AMREX_D_DECL(bigend[0]-smallend[0] + 1, - bigend[1]-smallend[1] + 1, - bigend[2]-smallend[2] + 1)); + return bigend - smallend + 1; } - //! Return the length of the Box in given direction. + //! Return the length of the BoxND in given direction. [[nodiscard]] AMREX_GPU_HOST_DEVICE int length (int dir) const noexcept { return bigend[dir] - smallend[dir] + 1; } + template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE GpuArray length3d () const noexcept { -#if (AMREX_SPACEDIM == 1) - return {{bigend[0]-smallend[0]+1, 1, 1}}; -#elif (AMREX_SPACEDIM == 2) - return {{bigend[0]-smallend[0]+1, bigend[1]-smallend[1]+1, 1}}; -#elif (AMREX_SPACEDIM == 3) - return {{bigend[0]-smallend[0]+1, bigend[1]-smallend[1]+1, bigend[2]-smallend[2]+1}}; -#endif + Dim3 len3d = length().dim3(1); + return {{len3d.x, len3d.y, len3d.z}}; } + template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE GpuArray loVect3d () const noexcept { -#if (AMREX_SPACEDIM == 1) - return {{smallend[0], 0, 0}}; -#elif (AMREX_SPACEDIM == 2) - return {{smallend[0], smallend[1], 0}}; -#elif (AMREX_SPACEDIM == 3) - return {{smallend[0], smallend[1], smallend[2]}}; -#endif + Dim3 lo3d = smallend.dim3(0); + return {{lo3d.x, lo3d.y, lo3d.z}}; } + template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE GpuArray hiVect3d () const noexcept { -#if (AMREX_SPACEDIM == 1) - return {{bigend[0], 0, 0}}; -#elif (AMREX_SPACEDIM == 2) - return {{bigend[0], bigend[1], 0}}; -#elif (AMREX_SPACEDIM == 3) - return {{bigend[0], bigend[1], bigend[2]}}; -#endif + Dim3 hi3d = bigend.dim3(0); + return {{hi3d.x, hi3d.y, hi3d.z}}; } //! Returns a constant pointer the array of low end coordinates. Useful for calls to FORTRAN. @@ -205,77 +191,77 @@ public: return face.isLow() ? smallend[dir] : bigend[dir]; } - //! Checks if it is an empty box. + //! Checks if it is an empty BoxND. [[nodiscard]] AMREX_GPU_HOST_DEVICE bool isEmpty () const noexcept { return !ok(); } - //! Checks if it is a proper Box (including a valid type). + //! Checks if it is a proper BoxND (including a valid type). [[nodiscard]] AMREX_GPU_HOST_DEVICE bool ok () const noexcept { return bigend.allGE(smallend) && btype.ok(); } - //! Returns true if argument is contained within Box. + //! Returns true if argument is contained within BoxND. [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool contains (const IntVect& p) const noexcept { return p.allGE(smallend) && p.allLE(bigend); } + bool contains (const IntVectND& p) const noexcept { + return p.allGE(smallend) && p.allLE(bigend); + } - //! Returns true if argument is contained within Box. + //! Returns true if argument is contained within BoxND. + template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE bool contains (const Dim3& p) const noexcept { - return AMREX_D_TERM(p.x >= smallend[0] && p.x <= bigend[0], - && p.y >= smallend[1] && p.y <= bigend[1], - && p.z >= smallend[2] && p.z <= bigend[2]); + IntVectND piv{p}; + return contains(piv); } - //! Returns true if argument is contained within Box. + //! Returns true if argument is contained within BoxND. + template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE -#if (AMREX_SPACEDIM == 1) - bool contains (int i, int, int) const noexcept { -#elif (AMREX_SPACEDIM == 2) - bool contains (int i, int j, int) const noexcept { -#else bool contains (int i, int j, int k) const noexcept { -#endif - return AMREX_D_TERM(i >= smallend[0] && i <= bigend[0], - && j >= smallend[1] && j <= bigend[1], - && k >= smallend[2] && k <= bigend[2]); + Dim3 p3d{i, j, k}; + return contains(p3d); } - /** \brief Returns true if argument is contained within Box. + /** \brief Returns true if argument is contained within BoxND. * It is an error if the Boxes have different types. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool contains (const Box& b) const noexcept + bool contains (const BoxND& b) const noexcept { BL_ASSERT(sameType(b)); return b.smallend.allGE(smallend) && b.bigend.allLE(bigend); } - //! Returns true if argument is strictly contained within Box. + //! Returns true if argument is strictly contained within BoxND. [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool strictly_contains (const IntVect& p) const noexcept { return p.allGT(smallend) && p.allLT(bigend); } + bool strictly_contains (const IntVectND& p) const noexcept { + return p.allGT(smallend) && p.allLT(bigend); + } /** - * \brief Returns true if argument is strictly contained within Box. + * \brief Returns true if argument is strictly contained within BoxND. * It is an error if the Boxes have different types. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool strictly_contains (const Box& b) const noexcept + bool strictly_contains (const BoxND& b) const noexcept { BL_ASSERT(sameType(b)); return b.smallend.allGT(smallend) && b.bigend.allLT(bigend); } - //! Returns true if argument is strictly contained within Box. + //! Returns true if argument is strictly contained within BoxND. + template = 0> + [[nodiscard]] AMREX_GPU_HOST_DEVICE + bool strictly_contains (const Dim3& p) const noexcept { + IntVectND piv{p}; + return strictly_contains(piv); + } + + //! Returns true if argument is strictly contained within BoxND. + template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE -#if (AMREX_SPACEDIM == 1) - bool strictly_contains (int i, int, int) const noexcept { -#elif (AMREX_SPACEDIM == 2) - bool strictly_contains (int i, int j, int) const noexcept { -#else bool strictly_contains (int i, int j, int k) const noexcept { -#endif - return AMREX_D_TERM(i > smallend[0] && i < bigend[0], - && j > smallend[1] && j < bigend[1], - && k > smallend[2] && k < bigend[2]); + Dim3 p3d{i, j, k}; + return strictly_contains(p3d); } /** @@ -283,34 +269,32 @@ public: * It is an error if the Boxes have different types. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool intersects (const Box& b) const noexcept { Box isect(*this); isect &= b; return isect.ok(); } + bool intersects (const BoxND& b) const noexcept { BoxND isect(*this); isect &= b; return isect.ok(); } /** * \brief Returns true is Boxes same size, ie translates of each other,. * It is an error if they have different types. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool sameSize (const Box& b) const noexcept { + bool sameSize (const BoxND& b) const noexcept { BL_ASSERT(sameType(b)); - return AMREX_D_TERM(length(0) == b.length(0), - && length(1) == b.length(1), - && length(2) == b.length(2)); + return length() == b.length(); } //! Returns true if Boxes have same type. [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool sameType (const Box &b) const noexcept { return btype == b.btype; } + bool sameType (const BoxND &b) const noexcept { return btype == b.btype; } //! Returns true if Boxes are identical (including type). [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool operator== (const Box& b) const noexcept { return smallend == b.smallend && bigend == b.bigend && b.btype == btype; } + bool operator== (const BoxND& b) const noexcept { return smallend == b.smallend && bigend == b.bigend && b.btype == btype; } //! Returns true if Boxes differ (including type). [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool operator!= (const Box& b) const noexcept { return !operator==(b); } + bool operator!= (const BoxND& b) const noexcept { return !operator==(b); } [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool operator< (const Box& rhs) const noexcept + bool operator< (const BoxND& rhs) const noexcept { return btype < rhs.btype || ((btype == rhs.btype) && @@ -318,67 +302,106 @@ public: ((smallend == rhs.smallend) && (bigend < rhs.bigend)) )); } [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool operator <= (const Box& rhs) const noexcept { + bool operator <= (const BoxND& rhs) const noexcept { return !(rhs < *this); } [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool operator> (const Box& rhs) const noexcept { + bool operator> (const BoxND& rhs) const noexcept { return rhs < *this; } [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool operator>= (const Box& rhs) const noexcept { + bool operator>= (const BoxND& rhs) const noexcept { return !(*this < rhs); } - //! Returns true if Box is cell-centered in all indexing directions. + //! Returns true if BoxND is cell-centered in all indexing directions. [[nodiscard]] AMREX_GPU_HOST_DEVICE bool cellCentered () const noexcept { return !btype.any(); } + //! Assert that there are no int/Long overflows when calling length or numPts. + void checkOverflow () const noexcept { + if (ok()) { + for (int i = 0; i < dim; ++i) { + auto lo = static_cast(smallend[i]); + auto hi = static_cast(bigend[i]); + Long len = hi - lo + 1; + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(len>=0 && len::max(), + "Overflow when computing length of box"); + } + auto num_pts = static_cast(length(0)); + for (int i = 1; i < dim; ++i) { + auto len = static_cast(length(i)); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(num_pts == 0 || len == 0 || + num_pts <= std::numeric_limits::max() / len, + "Overflow when computing numPts of box"); + num_pts *= len; + } + } + } + /** - * \brief Returns the number of points contained in the Box. + * \brief Returns the number of points contained in the BoxND. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE Long numPts () const noexcept { - return ok() ? AMREX_D_TERM( static_cast(length(0)), - *static_cast(length(1)), - *static_cast(length(2))) - : Long(0); +#if defined(AMREX_DEBUG) || defined(AMREX_USE_ASSERTION) + AMREX_IF_ON_HOST((checkOverflow();)) +#endif + if (ok()) { + auto num_pts = static_cast(length(0)); + for (int i = 1; i < dim; ++i) { + num_pts *= static_cast(length(i)); + } + return num_pts; + } else { + return Long(0); + } } /** - * \brief Returns the number of points contained in the Box. + * \brief Returns the number of points contained in the BoxND. * This is intended for use only in diagnostic messages. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE double d_numPts () const noexcept { - return ok() ? AMREX_D_TERM( double(length(0)), - *double(length(1)), - *double(length(2))) - : 0.0; + if (ok()) { + auto num_pts = static_cast(length(0)); + for (int i = 1; i < dim; ++i) { + num_pts *= static_cast(length(i)); + } + return num_pts; + } else { + return 0.0; + } } /** * \brief Return the volume, in indexing space, of region enclosed by - * this Box. This is identical to numPts() for CELL centered - * Box; otherwise, numPts() > volume(). + * this BoxND. This is identical to numPts() for CELL centered + * BoxND; otherwise, numPts() > volume(). */ [[nodiscard]] AMREX_GPU_HOST_DEVICE Long volume () const noexcept { - return ok() ? AMREX_D_TERM( static_cast(length(0)-btype[0]), - *static_cast(length(1)-btype[1]), - *static_cast(length(2)-btype[2])) - : Long(0); + if (ok()) { + auto num_pts = static_cast(length(0)-btype[0]); + for (int i = 1; i < dim; ++i) { + num_pts *= static_cast(length(i)-btype[i]); + } + return num_pts; + } else { + return Long(0); + } } /** * \brief Returns length of longest side. dir is modified to give - * direction with longest side: 0...SPACEDIM-1. Ignores type. + * direction with longest side: 0...dim-1. Ignores type. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE int longside (int& dir) const noexcept { int maxlen = length(0); dir = 0; - for (int i = 1; i < AMREX_SPACEDIM; i++) + for (int i = 1; i < dim; i++) { if (length(i) > maxlen) { @@ -398,13 +421,13 @@ public: /** * \brief Returns length of shortest side. dir is modified to give - * direction with shortest side: 0...SPACEDIM-1. Ignores type. + * direction with shortest side: 0...dim-1. Ignores type. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE int shortside (int& dir) const noexcept { int minlen = length(0); dir = 0; - for (int i = 1; i < AMREX_SPACEDIM; i++) + for (int i = 1; i < dim; i++) { if (length(i) < minlen) { @@ -428,56 +451,57 @@ public: * Is used in accessing FArrayBox. */ [[nodiscard]] AMREX_GPU_HOST_DEVICE - Long index (const IntVect& v) const noexcept; + Long index (const IntVectND& v) const noexcept; - //! Given the offset, compute IntVect + //! Given the offset, compute IntVectND [[nodiscard]] AMREX_GPU_HOST_DEVICE - IntVect atOffset (Long offset) const noexcept; + IntVectND atOffset (Long offset) const noexcept; + template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE GpuArray atOffset3d (Long offset) const noexcept; - //! Redefine the small end of the Box. + //! Redefine the small end of the BoxND. AMREX_GPU_HOST_DEVICE - Box& setSmall (const IntVect& sm) noexcept { smallend = sm; return *this; } + BoxND& setSmall (const IntVectND& sm) noexcept { smallend = sm; return *this; } - //! Redefine the small end of the Box. + //! Redefine the small end of the BoxND. AMREX_GPU_HOST_DEVICE - Box& setSmall (int dir, int sm_index) noexcept { smallend.setVal(dir,sm_index); return *this; } + BoxND& setSmall (int dir, int sm_index) noexcept { smallend.setVal(dir,sm_index); return *this; } - //! Redefine the big end of the Box. + //! Redefine the big end of the BoxND. AMREX_GPU_HOST_DEVICE - Box& setBig (const IntVect& bg) noexcept { bigend = bg; return *this; } + BoxND& setBig (const IntVectND& bg) noexcept { bigend = bg; return *this; } - //! Redefine the big end of the Box. + //! Redefine the big end of the BoxND. AMREX_GPU_HOST_DEVICE - Box& setBig (int dir, int bg_index) noexcept { bigend.setVal(dir,bg_index); return *this; } + BoxND& setBig (int dir, int bg_index) noexcept { bigend.setVal(dir,bg_index); return *this; } /** * \brief Set the entire range in a given direction, starting at * sm_index with length n_cells. NOTE: This will yield an - * illegal Box if n_cells <= 0. + * illegal BoxND if n_cells <= 0. */ AMREX_GPU_HOST_DEVICE - Box& setRange (int dir, + BoxND& setRange (int dir, int sm_index, int n_cells = 1) noexcept; //! Set indexing type AMREX_GPU_HOST_DEVICE - Box& setType (const IndexType& t) noexcept { btype = t; return *this; } + BoxND& setType (const IndexTypeND& t) noexcept { btype = t; return *this; } - //! Shift this Box nzones indexing positions in coordinate direction dir. + //! Shift this BoxND nzones indexing positions in coordinate direction dir. AMREX_GPU_HOST_DEVICE - Box& shift (int dir, int nzones) noexcept { smallend.shift(dir,nzones); bigend.shift(dir,nzones); return *this; } + BoxND& shift (int dir, int nzones) noexcept { smallend.shift(dir,nzones); bigend.shift(dir,nzones); return *this; } //! Equivalent to b.shift(0,iv[0]).shift(1,iv[1]) .... AMREX_GPU_HOST_DEVICE - Box& shift (const IntVect& iv) noexcept { smallend.shift(iv); bigend.shift(iv); return *this; } + BoxND& shift (const IntVectND& iv) noexcept { smallend.shift(iv); bigend.shift(iv); return *this; } /** - * \brief This member shifts the Box by "half" indices, thereby - * converting the Box from type CELL to NODE and visa-versa. + * \brief This member shifts the BoxND by "half" indices, thereby + * converting the BoxND from type CELL to NODE and visa-versa. * b.shiftHalf(0,1) shifts b to the right by 1/2 cells. * b.shiftHalf(1,-3) shifts b in the -j direction by 3/2 cells. * NOTE: If num_halfs is EVEN the shift is num_halfs/2 full @@ -485,64 +509,64 @@ public: * This is: b.shifthalf(4) == b.shift(2). */ AMREX_GPU_HOST_DEVICE - Box& shiftHalf (int dir, int num_halfs) noexcept; + BoxND& shiftHalf (int dir, int num_halfs) noexcept; //! Equivalent to b.shiftHalf(0,iv[0]).shiftHalf(1,iv[1]) ... AMREX_GPU_HOST_DEVICE - Box& shiftHalf (const IntVect& iv) noexcept; + BoxND& shiftHalf (const IntVectND& iv) noexcept; /** - * \brief Convert the Box from the current type into the - * argument type. This may change the Box coordinates: + * \brief Convert the BoxND from the current type into the + * argument type. This may change the BoxND coordinates: * type CELL -> NODE : increase coordinate by one on high end * type NODE -> CELL : reduce coordinate by one on high end * other type mappings make no change. */ AMREX_GPU_HOST_DEVICE - Box& convert (IndexType typ) noexcept; + BoxND& convert (IndexTypeND typ) noexcept; /** - * \brief Convert the Box from the current type into the - * argument type. This may change the Box coordinates: + * \brief Convert the BoxND from the current type into the + * argument type. This may change the BoxND coordinates: * type CELL -> NODE : increase coordinate by one on high end * type NODE -> CELL : reduce coordinate by one on high end * other type mappings make no change. */ AMREX_GPU_HOST_DEVICE - Box& convert (const IntVect& typ) noexcept; + BoxND& convert (const IntVectND& typ) noexcept; //! Convert to NODE type in all directions. AMREX_GPU_HOST_DEVICE - Box& surroundingNodes () noexcept; + BoxND& surroundingNodes () noexcept; //! Convert to NODE type in given direction. AMREX_GPU_HOST_DEVICE - Box& surroundingNodes (int dir) noexcept; + BoxND& surroundingNodes (int dir) noexcept; AMREX_GPU_HOST_DEVICE - Box& surroundingNodes (Direction d) noexcept { return surroundingNodes(static_cast(d)); } + BoxND& surroundingNodes (Direction d) noexcept { return surroundingNodes(static_cast(d)); } //! Convert to CELL type in all directions. AMREX_GPU_HOST_DEVICE - Box& enclosedCells () noexcept; + BoxND& enclosedCells () noexcept; //! Convert to CELL type in given direction. AMREX_GPU_HOST_DEVICE - Box& enclosedCells (int dir) noexcept; + BoxND& enclosedCells (int dir) noexcept; AMREX_GPU_HOST_DEVICE - Box& enclosedCells (Direction d) noexcept { return enclosedCells(static_cast(d)); } + BoxND& enclosedCells (Direction d) noexcept { return enclosedCells(static_cast(d)); } /** - * \brief Return Box that is intersection of this Box + * \brief Return BoxND that is intersection of this BoxND * and argument. The Boxes MUST be of same type. */ AMREX_GPU_HOST_DEVICE - Box operator& (const Box& rhs) const noexcept { Box lhs(*this); lhs &= rhs; return lhs; } + BoxND operator& (const BoxND& rhs) const noexcept { BoxND lhs(*this); lhs &= rhs; return lhs; } - //! Intersect this Box with its argument. The Boxes MUST be of the same type. + //! Intersect this BoxND with its argument. The Boxes MUST be of the same type. AMREX_GPU_HOST_DEVICE - Box& operator&= (const Box& rhs) noexcept + BoxND& operator&= (const BoxND& rhs) noexcept { BL_ASSERT(sameType(rhs)); smallend.max(rhs.smallend); @@ -551,12 +575,12 @@ public: } /** - * \brief Modify Box to that of the minimum Box containing both - * the original Box and the argument. + * \brief Modify BoxND to that of the minimum BoxND containing both + * the original BoxND and the argument. * Both Boxes must have identical type. */ AMREX_GPU_HOST_DEVICE - Box& minBox (const Box& b) noexcept { + BoxND& minBox (const BoxND& b) noexcept { // BoxArray may call this with not ok boxes. BL_ASSERT(b.ok() && ok()); BL_ASSERT(sameType(b)); smallend.min(b.smallend); @@ -564,82 +588,82 @@ public: return *this; } - //! Shift Box (relative) by given IntVect. + //! Shift BoxND (relative) by given IntVectND. AMREX_GPU_HOST_DEVICE - Box& operator+= (const IntVect& v) noexcept { smallend += v; bigend += v; return *this; } + BoxND& operator+= (const IntVectND& v) noexcept { smallend += v; bigend += v; return *this; } - //! Shift Box (relative) by given IntVect. + //! Shift BoxND (relative) by given IntVectND. AMREX_GPU_HOST_DEVICE - Box operator+ (const IntVect& v) const noexcept { Box r(*this); r += v; return r; } + BoxND operator+ (const IntVectND& v) const noexcept { BoxND r(*this); r += v; return r; } - //! Shift Box (relative) by given IntVect. + //! Shift BoxND (relative) by given IntVectND. AMREX_GPU_HOST_DEVICE - Box& operator-= (const IntVect& v) noexcept { smallend -= v; bigend -= v; return *this; } + BoxND& operator-= (const IntVectND& v) noexcept { smallend -= v; bigend -= v; return *this; } - //! Shift Box (relative) by given IntVect. + //! Shift BoxND (relative) by given IntVectND. AMREX_GPU_HOST_DEVICE - Box operator- (const IntVect& v) const noexcept { Box r(*this); r -= v; return r; } + BoxND operator- (const IntVectND& v) const noexcept { BoxND r(*this); r -= v; return r; } /** - * \brief Chop the Box at the chop_pnt in the dir direction - * returns one Box, modifies the object Box. - * The union of the two is the original Box. - * The modified Box is the low end, the returned Box + * \brief Chop the BoxND at the chop_pnt in the dir direction + * returns one BoxND, modifies the object BoxND. + * The union of the two is the original BoxND. + * The modified BoxND is the low end, the returned BoxND * is the high end. If type(dir) = CELL, the Boxes are disjoint - * with the chop_pnt included in the high end (new Box). - * It is an ERROR if chop_pnt is the low end of the orig Box. + * with the chop_pnt included in the high end (new BoxND). + * It is an ERROR if chop_pnt is the low end of the orig BoxND. * If type(dir) = NODE, the chop_pnt is included in both Boxes * but is the only point in common. It is also an error if the - * chop_pnt is an end node of the Box. + * chop_pnt is an end node of the BoxND. */ AMREX_GPU_HOST_DEVICE - Box chop (int dir, int chop_pnt) noexcept; + BoxND chop (int dir, int chop_pnt) noexcept; /* - * \brief Grow Box in all directions by given amount. - * NOTE: n_cell negative shrinks the Box by that number of cells. + * \brief Grow BoxND in all directions by given amount. + * NOTE: n_cell negative shrinks the BoxND by that number of cells. */ AMREX_GPU_HOST_DEVICE - Box& grow (int i) noexcept { smallend.diagShift(-i); bigend.diagShift(i); return *this; } + BoxND& grow (int i) noexcept { smallend.diagShift(-i); bigend.diagShift(i); return *this; } - //! Grow Box in each direction by specified amount. + //! Grow BoxND in each direction by specified amount. AMREX_GPU_HOST_DEVICE - Box& grow (const IntVect& v) noexcept { smallend -= v; bigend += v; return *this;} + BoxND& grow (const IntVectND& v) noexcept { smallend -= v; bigend += v; return *this;} /** - * \brief Grow the Box on the low and high end by n_cell cells + * \brief Grow the BoxND on the low and high end by n_cell cells * in direction idir. */ AMREX_GPU_HOST_DEVICE - Box& grow (int idir, int n_cell) noexcept { smallend.shift(idir, -n_cell); bigend.shift(idir, n_cell); return *this; } + BoxND& grow (int idir, int n_cell) noexcept { smallend.shift(idir, -n_cell); bigend.shift(idir, n_cell); return *this; } AMREX_GPU_HOST_DEVICE - Box& grow (Direction d, int n_cell) noexcept { return grow(static_cast(d), n_cell); } + BoxND& grow (Direction d, int n_cell) noexcept { return grow(static_cast(d), n_cell); } /** - * \brief Grow the Box on the low end by n_cell cells in direction idir. - * NOTE: n_cell negative shrinks the Box by that number of cells. + * \brief Grow the BoxND on the low end by n_cell cells in direction idir. + * NOTE: n_cell negative shrinks the BoxND by that number of cells. */ AMREX_GPU_HOST_DEVICE - Box& growLo (int idir, int n_cell = 1) noexcept { smallend.shift(idir, -n_cell); return *this; } + BoxND& growLo (int idir, int n_cell = 1) noexcept { smallend.shift(idir, -n_cell); return *this; } AMREX_GPU_HOST_DEVICE - Box& growLo (Direction d, int n_cell = 1) noexcept { return growLo(static_cast(d), n_cell); } + BoxND& growLo (Direction d, int n_cell = 1) noexcept { return growLo(static_cast(d), n_cell); } /** - * \brief Grow the Box on the high end by n_cell cells in - * direction idir. NOTE: n_cell negative shrinks the Box by that + * \brief Grow the BoxND on the high end by n_cell cells in + * direction idir. NOTE: n_cell negative shrinks the BoxND by that * number of cells. */ AMREX_GPU_HOST_DEVICE - Box& growHi (int idir, int n_cell = 1) noexcept { bigend.shift(idir,n_cell); return *this; } + BoxND& growHi (int idir, int n_cell = 1) noexcept { bigend.shift(idir,n_cell); return *this; } AMREX_GPU_HOST_DEVICE - Box& growHi (Direction d, int n_cell = 1) noexcept { return growHi(static_cast(d), n_cell); } + BoxND& growHi (Direction d, int n_cell = 1) noexcept { return growHi(static_cast(d), n_cell); } //! Grow in the direction of the given face. AMREX_GPU_HOST_DEVICE - Box& grow (Orientation face, int n_cell = 1) noexcept { + BoxND& grow (Orientation face, int n_cell = 1) noexcept { int idir = face.coordDir(); if (face.isLow()) { smallend.shift(idir, -n_cell); @@ -650,80 +674,80 @@ public: } /** - * \brief Refine Box by given (positive) refinement ratio. + * \brief Refine BoxND by given (positive) refinement ratio. * NOTE: if type(dir) = CELL centered: lo <- lo*ratio and * hi <- (hi+1)*ratio - 1. * NOTE: if type(dir) = NODE centered: lo <- lo*ratio and * hi <- hi*ratio. */ AMREX_GPU_HOST_DEVICE - Box& refine (int ref_ratio) noexcept { - return this->refine(IntVect(ref_ratio)); + BoxND& refine (int ref_ratio) noexcept { + return this->refine(IntVectND(ref_ratio)); } /* - * \brief Refine Box by given (positive) refinement ratio. + * \brief Refine BoxND by given (positive) refinement ratio. * NOTE: if type(dir) = CELL centered: lo <- lo*ratio and * hi <- (hi+1)*ratio - 1. * NOTE: if type(dir) = NODE centered: lo <- lo*ratio and * hi <- hi*ratio. */ AMREX_GPU_HOST_DEVICE - Box& refine (const IntVect& ref_ratio) noexcept; + BoxND& refine (const IntVectND& ref_ratio) noexcept; /** - * \brief Coarsen Box by given (positive) refinement ratio. + * \brief Coarsen BoxND by given (positive) refinement ratio. * NOTE: if type(dir) = CELL centered: lo <- lo/ratio and * hi <- hi/ratio. * NOTE: if type(dir) = NODE centered: lo <- lo/ratio and * hi <- hi/ratio + ((hi%ratio)==0 ? 0 : 1). - * That is, refinement of coarsened Box must contain - * the original Box. + * That is, refinement of coarsened BoxND must contain + * the original BoxND. */ AMREX_GPU_HOST_DEVICE - Box& coarsen (int ref_ratio) noexcept { - return this->coarsen(IntVect(ref_ratio)); + BoxND& coarsen (int ref_ratio) noexcept { + return this->coarsen(IntVectND(ref_ratio)); } /** - * \brief Coarsen Box by given (positive) refinement ratio. + * \brief Coarsen BoxND by given (positive) refinement ratio. * NOTE: if type(dir) = CELL centered: lo <- lo/ratio and * hi <- hi/ratio. * NOTE: if type(dir) = NODE centered: lo <- lo/ratio and * hi <- hi/ratio + ((hi%ratio)==0 ? 0 : 1). - * That is, refinement of coarsened Box must contain - * the original Box. + * That is, refinement of coarsened BoxND must contain + * the original BoxND. */ AMREX_GPU_HOST_DEVICE - Box& coarsen (const IntVect& ref_ratio) noexcept; + BoxND& coarsen (const IntVectND& ref_ratio) noexcept; /** * \brief Step through the rectangle. It is a runtime error to give * a point not inside rectangle. Iteration may not be efficient. */ AMREX_GPU_HOST_DEVICE - void next (IntVect &) const noexcept; + void next (IntVectND &) const noexcept; /** * \brief This static member function returns a constant reference to - * an object of type Box representing the unit box in - * AMREX_SPACEDIM-dimensional space. + * an object of type BoxND representing the unit BoxND in + * dim-dimensional space. */ AMREX_GPU_HOST_DEVICE - static Box TheUnitBox () noexcept { - return Box(IntVect::TheZeroVector(),IntVect::TheZeroVector()); + static BoxND TheUnitBox () noexcept { + return BoxND(IntVectND::TheZeroVector(),IntVectND::TheZeroVector()); } [[nodiscard]] AMREX_GPU_HOST_DEVICE bool isSquare() const noexcept; [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool coarsenable(const IntVect& refrat, const IntVect& min_width) const noexcept + bool coarsenable(const IntVectND& refrat, const IntVectND& min_width) const noexcept { if (!size().allGE(refrat*min_width)) { return false; } else { - Box testBox = *this; + BoxND testBox = *this; testBox.coarsen(refrat); testBox.refine (refrat); return (*this == testBox); @@ -732,19 +756,19 @@ public: [[nodiscard]] AMREX_GPU_HOST_DEVICE bool coarsenable(int refrat, int min_width=1) const noexcept { - return coarsenable(IntVect(refrat), IntVect(min_width)); + return coarsenable(IntVectND(refrat), IntVectND(min_width)); } [[nodiscard]] AMREX_GPU_HOST_DEVICE - bool coarsenable(const IntVect& refrat, int min_width=1) const noexcept + bool coarsenable(const IntVectND& refrat, int min_width=1) const noexcept { - return coarsenable(refrat, IntVect(min_width)); + return coarsenable(refrat, IntVectND(min_width)); } AMREX_GPU_HOST_DEVICE void normalize () noexcept { - for (int idim=0; idim < AMREX_SPACEDIM; ++idim) { + for (int idim=0; idim < dim; ++idim) { if (this->length(idim) == 0) { this->growHi(idim,1); } @@ -752,37 +776,80 @@ public: } AMREX_GPU_HOST_DEVICE - Box& makeSlab (int direction, int slab_index) noexcept + BoxND& makeSlab (int direction, int slab_index) noexcept { smallend[direction] = slab_index; bigend[direction] = slab_index; return *this; } - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 lbound (Box const& box) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 ubound (Box const& box) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 begin (Box const& box) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 end (Box const& box) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 length (Box const& box) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 max_lbound (Box const&, Box const&) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 max_lbound (Box const&, Dim3 const&) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 min_ubound (Box const&, Box const&) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Dim3 min_ubound (Box const&, Dim3 const&) noexcept; - AMREX_GPU_HOST_DEVICE friend AMREX_FORCE_INLINE Box minBox (Box const&, Box const&, IndexType) noexcept; + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr std::size_t ndims () noexcept { + return static_cast(dim); + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int indims () noexcept { + return dim; + } + + /** + * \brief Returns a new BoxND of dimension new_dim and + * assigns the first new_dim dimension of this BoxND to it. + */ + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + BoxND shrink () const noexcept { + static_assert(new_dim <= dim); + auto lo = smallend.template shrink(); + auto hi = bigend.template shrink(); + auto typ = btype.template shrink(); + return BoxND(lo, hi, typ); + } + + /** + * \brief Returns a new BoxND of size new_dim and + * assigns all values of this BoxND to it and + * (small=0, big=0, typ=CELL) to the remaining elements. + */ + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + BoxND expand () const noexcept { + static_assert(new_dim >= dim); + auto lo = smallend.template expand(0); + auto hi = bigend.template expand(0); + auto typ = btype.template expand(IndexType::CellIndex::CELL); + return BoxND(lo, hi, typ); + } + + /** + * \brief Returns a new BoxND of size new_dim + * by either shrinking or expanding this BoxND + */ + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + BoxND resize () const noexcept { + if constexpr (new_dim > dim) { + return expand(); + } else { + return shrink(); + } + } private: - IntVect smallend; - IntVect bigend; - IndexType btype; + IntVectND smallend; + IntVectND bigend; + IndexTypeND btype; }; +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::refine (const IntVect& ref_ratio) noexcept +BoxND& +BoxND::refine (const IntVectND& ref_ratio) noexcept { if (ref_ratio != 1) { - IntVect shft(1); + IntVectND shft(1); shft -= btype.ixType(); smallend *= ref_ratio; bigend += shft; @@ -792,10 +859,11 @@ Box::refine (const IntVect& ref_ratio) noexcept return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::coarsen (const IntVect& ref_ratio) noexcept +BoxND& +BoxND::coarsen (const IntVectND& ref_ratio) noexcept { if (ref_ratio != 1) { @@ -803,8 +871,8 @@ Box::coarsen (const IntVect& ref_ratio) noexcept if (btype.any()) { - IntVect off(0); - for (int dir = 0; dir < AMREX_SPACEDIM; dir++) + IntVectND off(0); + for (int dir = 0; dir < dim; dir++) { if (btype[dir]) { if (bigend[dir]%ref_ratio[dir]) { @@ -824,38 +892,41 @@ Box::coarsen (const IntVect& ref_ratio) noexcept return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::convert (const IntVect& typ) noexcept +BoxND& +BoxND::convert (const IntVectND& typ) noexcept { BL_ASSERT(typ.allGE(0) && typ.allLE(1)); - IntVect shft(typ - btype.ixType()); + IntVectND shft(typ - btype.ixType()); bigend += shft; - btype = IndexType(typ); + btype = IndexTypeND(typ); return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::convert (IndexType t) noexcept +BoxND& +BoxND::convert (IndexTypeND t) noexcept { - for (int dir = 0; dir < AMREX_SPACEDIM; dir++) + for (int dir = 0; dir < dim; dir++) { const auto typ = t[dir]; const auto bitval = btype[dir]; const int off = typ - bitval; bigend.shift(dir,off); - btype.setType(dir, (IndexType::CellIndex) typ); + btype.setType(dir, static_cast(typ)); } return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::surroundingNodes (int dir) noexcept +BoxND& +BoxND::surroundingNodes (int dir) noexcept { if (!(btype[dir])) { @@ -868,12 +939,13 @@ Box::surroundingNodes (int dir) noexcept return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::surroundingNodes () noexcept +BoxND& +BoxND::surroundingNodes () noexcept { - for (int i = 0; i < AMREX_SPACEDIM; ++i) { + for (int i = 0; i < dim; ++i) { if ((btype[i] == 0)) { bigend.shift(i,1); } @@ -882,10 +954,11 @@ Box::surroundingNodes () noexcept return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::enclosedCells (int dir) noexcept +BoxND& +BoxND::enclosedCells (int dir) noexcept { if (btype[dir]) { @@ -898,12 +971,13 @@ Box::enclosedCells (int dir) noexcept return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::enclosedCells () noexcept +BoxND& +BoxND::enclosedCells () noexcept { - for (int i = 0 ; i < AMREX_SPACEDIM; ++i) { + for (int i = 0 ; i < dim; ++i) { if (btype[i]) { bigend.shift(i,-1); } @@ -912,78 +986,64 @@ Box::enclosedCells () noexcept return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Long -Box::index (const IntVect& v) const noexcept -{ - Long result = v[0]-smallend[0]; -#if AMREX_SPACEDIM==2 - result += length(0)*Long(v[1]-smallend[1]); -#elif AMREX_SPACEDIM==3 - result += length(0)*((v[1]-smallend[1]) - +Long(v[2]-smallend[2])*length(1)); -#endif +BoxND::index (const IntVectND& v) const noexcept +{ + IntVectND vz = v - smallend; + Long result = vz[0]; + Long mult = length(0); + for (int i = 1 ; i < dim; ++i) { + result += mult * vz[i]; + mult *= length(i); + } return result; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -IntVect -Box::atOffset (Long offset) const noexcept -{ -#if (AMREX_SPACEDIM == 1) - return IntVect{static_cast(offset+smallend[0])}; -#elif (AMREX_SPACEDIM == 2) - int xlen = bigend[0]-smallend[0]+1; - Long j = offset / xlen; - Long i = offset - j*xlen; - return IntVect{static_cast(i+smallend[0]), - static_cast(j+smallend[1])}; -#elif (AMREX_SPACEDIM == 3) - int xlen = bigend[0]-smallend[0]+1; - int ylen = bigend[1]-smallend[1]+1; - Long k = offset / (xlen*ylen); - Long j = (offset - k*(xlen*ylen)) / xlen; - Long i = (offset - k*(xlen*ylen)) - j*xlen; - return IntVect{static_cast(i+smallend[0]), - static_cast(j+smallend[1]), - static_cast(k+smallend[2])}; -#endif +IntVectND +BoxND::atOffset (Long offset) const noexcept +{ + IntVectND result = smallend; + + if constexpr (dim > 1) { + GpuArray mult{}; + mult[0] = length(0); + for (int i = 1 ; i < dim-1; ++i) { + mult[i] = mult[i-1] * length(i); + } + for (int i = dim-1 ; i > 0; --i) { + Long idx = offset / mult[i-1]; + offset -= idx * mult[i-1]; + result[i] += static_cast(idx); + } + } + + result[0] += static_cast(offset); + + return result; } +template +template > AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE GpuArray -Box::atOffset3d (Long offset) const noexcept -{ -#if (AMREX_SPACEDIM == 1) - return {{static_cast(offset+smallend[0]), - static_cast(0), - static_cast(0)}}; -#elif (AMREX_SPACEDIM == 2) - int xlen = bigend[0]-smallend[0]+1; - Long j = offset / xlen; - Long i = offset - j*xlen; - return {{static_cast(i+smallend[0]), - static_cast(j+smallend[1]), - static_cast(0)}}; -#elif (AMREX_SPACEDIM == 3) - int xlen = bigend[0]-smallend[0]+1; - int ylen = bigend[1]-smallend[1]+1; - Long k = offset / (xlen*ylen); - Long j = (offset - k*(xlen*ylen)) / xlen; - Long i = (offset - k*(xlen*ylen)) - j*xlen; - return {{static_cast(i+smallend[0]), - static_cast(j+smallend[1]), - static_cast(k+smallend[2])}}; -#endif +BoxND::atOffset3d (Long offset) const noexcept +{ + Dim3 iv3d = atOffset(offset).dim3(0); + return {{iv3d.x, iv3d.y, iv3d.z}}; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box& -Box::setRange (int dir, +BoxND& +BoxND::setRange (int dir, int sm_index, int n_cells) noexcept { @@ -992,93 +1052,92 @@ Box::setRange (int dir, return *this; } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -Box::next (IntVect& p) const noexcept // NOLINT(readability-convert-member-functions-to-static) +BoxND::next (IntVectND& p) const noexcept // NOLINT(readability-convert-member-functions-to-static) { BL_ASSERT(contains(p)); ++p[0]; -#if (AMREX_SPACEDIM >= 2) - if (p[0] > bigend[0]) - { - p[0] = smallend[0]; - ++p[1]; -#if (AMREX_SPACEDIM == 3) - if (p[1] > bigend[1]) - { - p[1] = smallend[1]; - ++p[2]; + for (int i = 0 ; i < dim-1; ++i) { + if (p[i] > bigend[i]) { + p[i] = smallend[i]; + ++p[i+1]; + } else { + break; } -#endif } -#endif } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool -Box::isSquare () const noexcept // NOLINT(readability-convert-member-functions-to-static) -{ -#if AMREX_SPACEDIM==1 - return false; // can't build a square in 1-D -#elif AMREX_SPACEDIM==2 - const IntVect& sz = this->size(); - return (sz[0] == sz[1]); -#elif AMREX_SPACEDIM==3 - const IntVect& sz = this->size(); - return (sz[0] == sz[1] && (sz[1] == sz[2])); -#endif +BoxND::isSquare () const noexcept // NOLINT(readability-convert-member-functions-to-static) +{ + if constexpr (dim == 1) { + return false; // can't build a square in 1-D + } else { + bool is_square = true; + const IntVectND& sz = this->size(); + for (int i = 0 ; i < dim-1; ++i) { + is_square = is_square && (sz[i] == sz[i+1]); + } + return is_square; + } } // -// Modified Box is low end, returned Box is high end. +// Modified BoxND is low end, returned BoxND is high end. // If CELL: chop_pnt included in high end. // If NODE: chop_pnt included in both Boxes. // +template AMREX_GPU_HOST_DEVICE inline -Box -Box::chop (int dir, int chop_pnt) noexcept +BoxND +BoxND::chop (int dir, int chop_pnt) noexcept { // - // Define new high end Box including chop_pnt. + // Define new high end BoxND including chop_pnt. // - IntVect sm(smallend); - IntVect bg(bigend); + IntVectND sm(smallend); + IntVectND bg(bigend); sm.setVal(dir,chop_pnt); if (btype[dir]) { // - // NODE centered Box. + // NODE centered BoxND. // BL_ASSERT(chop_pnt > smallend[dir] && chop_pnt < bigend[dir]); // - // Shrink original Box to just contain chop_pnt. + // Shrink original BoxND to just contain chop_pnt. // bigend.setVal(dir,chop_pnt); } else { // - // CELL centered Box. + // CELL centered BoxND. // BL_ASSERT(chop_pnt > smallend[dir] && chop_pnt <= bigend[dir]); // - // Shrink original Box to one below chop_pnt. + // Shrink original BoxND to one below chop_pnt. // bigend.setVal(dir,chop_pnt-1); } - return Box(sm,bg,btype); + return BoxND(sm,bg,btype); } +template AMREX_GPU_HOST_DEVICE inline -Box& -Box::shiftHalf (int dir, int num_halfs) noexcept +BoxND& +BoxND::shiftHalf (int dir, int num_halfs) noexcept { const int nbit = (num_halfs<0 ? -num_halfs : num_halfs)%2; int nshift = num_halfs/2; @@ -1099,12 +1158,13 @@ Box::shiftHalf (int dir, int num_halfs) noexcept return *this; } +template AMREX_GPU_HOST_DEVICE inline -Box& -Box::shiftHalf (const IntVect& iv) noexcept +BoxND& +BoxND::shiftHalf (const IntVectND& iv) noexcept { - for (int i = 0; i < AMREX_SPACEDIM; i++) { + for (int i = 0; i < dim; i++) { shiftHalf(i,iv[i]); } return *this; @@ -1140,308 +1200,332 @@ public: void AllGatherBoxes (Vector& bxs, int n_extra_reserve=0); /** - * \brief Grow Box in all directions by given amount. + * \brief Grow BoxND in all directions by given amount. - * NOTE: n_cell negative shrinks the Box by that number of cells. + * NOTE: n_cell negative shrinks the BoxND by that number of cells. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box grow (const Box& b, int i) noexcept +BoxND grow (const BoxND& b, int i) noexcept { - Box result = b; + BoxND result = b; result.grow(i); return result; } - //! Grow Box in each direction by specified amount. + //! Grow BoxND in each direction by specified amount. +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box grow (const Box& b, const IntVect& v) noexcept +BoxND grow (const BoxND& b, const IntVectND& v) noexcept { - Box result = b; + BoxND result = b; result.grow(v); return result; } - //! Grow Box in direction idir be n_cell cells + //! Grow BoxND in direction idir be n_cell cells +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box grow (const Box& b, int idir, int n_cell) noexcept +BoxND grow (const BoxND& b, int idir, int n_cell) noexcept { - Box result = b; + BoxND result = b; result.grow(idir, n_cell); return result; } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box grow (const Box& b, Direction d, int n_cell) noexcept +BoxND grow (const BoxND& b, Direction d, int n_cell) noexcept { return grow(b, static_cast(d), n_cell); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box growLo (const Box& b, int idir, int n_cell) noexcept +BoxND growLo (const BoxND& b, int idir, int n_cell) noexcept { - Box result = b; + BoxND result = b; result.growLo(idir, n_cell); return result; } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box growLo (const Box& b, Direction d, int n_cell) noexcept +BoxND growLo (const BoxND& b, Direction d, int n_cell) noexcept { return growLo(b, static_cast(d), n_cell); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box growHi (const Box& b, int idir, int n_cell) noexcept +BoxND growHi (const BoxND& b, int idir, int n_cell) noexcept { - Box result = b; + BoxND result = b; result.growHi(idir, n_cell); return result; } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box growHi (const Box& b, Direction d, int n_cell) noexcept +BoxND growHi (const BoxND& b, Direction d, int n_cell) noexcept { return growHi(b, static_cast(d), n_cell); } /** - * \brief Coarsen Box by given (positive) refinement ratio. + * \brief Coarsen BoxND by given (positive) refinement ratio. * NOTE: if type(dir) = CELL centered: lo <- lo/ratio and * hi <- hi/ratio. * NOTE: if type(dir) = NODE centered: lo <- lo/ratio and * hi <- hi/ratio + ((hi%ratio)==0 ? 0 : 1). - * That is, refinement of coarsened Box must contain - * the original Box. + * That is, refinement of coarsened BoxND must contain + * the original BoxND. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box coarsen (const Box& b, int ref_ratio) noexcept +BoxND coarsen (const BoxND& b, int ref_ratio) noexcept { - Box result = b; - result.coarsen(IntVect(ref_ratio)); + BoxND result = b; + result.coarsen(IntVectND(ref_ratio)); return result; } /** - * \brief Coarsen Box by given (positive) refinement ratio. + * \brief Coarsen BoxND by given (positive) refinement ratio. * NOTE: if type(dir) = CELL centered: lo <- lo/ratio and * hi <- hi/ratio. * NOTE: if type(dir) = NODE centered: lo <- lo/ratio and * hi <- hi/ratio + ((hi%ratio)==0 ? 0 : 1). - * That is, refinement of coarsened Box must contain - * the original Box. + * That is, refinement of coarsened BoxND must contain + * the original BoxND. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box coarsen (const Box& b, const IntVect& ref_ratio) noexcept +BoxND coarsen (const BoxND& b, const IntVectND& ref_ratio) noexcept { - Box result = b; + BoxND result = b; result.coarsen(ref_ratio); return result; } /** - * Refine Box by given (positive) refinement ratio. + * Refine BoxND by given (positive) refinement ratio. * NOTE: if type(dir) = CELL centered: lo <- lo*ratio and * hi <- (hi+1)*ratio - 1. * NOTE: if type(dir) = NODE centered: lo <- lo*ratio and * hi <- hi*ratio. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box refine (const Box& b, int ref_ratio) noexcept +BoxND refine (const BoxND& b, int ref_ratio) noexcept { - Box result = b; - result.refine(IntVect(ref_ratio)); + BoxND result = b; + result.refine(IntVectND(ref_ratio)); return result; } /** - * \brief Refine Box by given (positive) refinement ratio. + * \brief Refine BoxND by given (positive) refinement ratio. * NOTE: if type(dir) = CELL centered: lo <- lo*ratio and * hi <- (hi+1)*ratio - 1. * NOTE: if type(dir) = NODE centered: lo <- lo*ratio and * hi <- hi*ratio. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box refine (const Box& b, const IntVect& ref_ratio) noexcept +BoxND refine (const BoxND& b, const IntVectND& ref_ratio) noexcept { - Box result = b; + BoxND result = b; result.refine(ref_ratio); return result; } - //! Return a Box with indices shifted by nzones in dir direction. + //! Return a BoxND with indices shifted by nzones in dir direction. +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box shift (const Box& b, int dir, int nzones) noexcept +BoxND shift (const BoxND& b, int dir, int nzones) noexcept { - Box result = b; + BoxND result = b; result.shift(dir, nzones); return result; } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box shift (const Box& b, const IntVect& nzones) noexcept +BoxND shift (const BoxND& b, const IntVectND& nzones) noexcept { - Box result = b; + BoxND result = b; result.shift(nzones); return result; } /** - * \brief Returns a Box with NODE based coordinates in direction dir - * that encloses Box b. NOTE: equivalent to b.convert(dir,NODE) + * \brief Returns a BoxND with NODE based coordinates in direction dir + * that encloses BoxND b. NOTE: equivalent to b.convert(dir,NODE) * NOTE: error if b.type(dir) == NODE. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box surroundingNodes (const Box& b, int dir) noexcept +BoxND surroundingNodes (const BoxND& b, int dir) noexcept { - Box bx(b); + BoxND bx(b); bx.surroundingNodes(dir); return bx; } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box surroundingNodes (const Box& b, Direction d) noexcept +BoxND surroundingNodes (const BoxND& b, Direction d) noexcept { return surroundingNodes(b, static_cast(d)); } /** - * \brief Returns a Box with NODE based coordinates in all - * directions that encloses Box b. + * \brief Returns a BoxND with NODE based coordinates in all + * directions that encloses BoxND b. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box surroundingNodes (const Box& b) noexcept +BoxND surroundingNodes (const BoxND& b) noexcept { - Box bx(b); + BoxND bx(b); bx.surroundingNodes(); return bx; } - //! Returns a Box with different type + //! Returns a BoxND with different type +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box convert (const Box& b, const IntVect& typ) noexcept +BoxND convert (const BoxND& b, const IntVectND& typ) noexcept { - Box bx(b); + BoxND bx(b); bx.convert(typ); return bx; } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box convert (const Box& b, const IndexType& typ) noexcept +BoxND convert (const BoxND& b, const IndexTypeND& typ) noexcept { - Box bx(b); + BoxND bx(b); bx.convert(typ); return bx; } /** - * \brief Returns a Box with CELL based coordinates in + * \brief Returns a BoxND with CELL based coordinates in * direction dir that is enclosed by b. * NOTE: equivalent to b.convert(dir,CELL) * NOTE: error if b.type(dir) == CELL. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box enclosedCells (const Box& b, int dir) noexcept +BoxND enclosedCells (const BoxND& b, int dir) noexcept { - Box bx(b); + BoxND bx(b); bx.enclosedCells(dir); return bx; } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box enclosedCells (const Box& b, Direction d) noexcept +BoxND enclosedCells (const BoxND& b, Direction d) noexcept { return enclosedCells(b, static_cast(d)); } /** - * \brief Returns a Box with CELL based coordinates in all + * \brief Returns a BoxND with CELL based coordinates in all * directions that is enclosed by b. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box enclosedCells (const Box& b) noexcept +BoxND enclosedCells (const BoxND& b) noexcept { - Box bx(b); + BoxND bx(b); bx.enclosedCells(); return bx; } /** - * \brief Returns the edge-centered Box (in direction dir) defining - * the low side of Box b. + * \brief Returns the edge-centered BoxND (in direction dir) defining + * the low side of BoxND b. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box bdryLo (const Box& b, int dir, int len=1) noexcept +BoxND bdryLo (const BoxND& b, int dir, int len=1) noexcept { - IntVect low(b.smallEnd()); - IntVect hi(b.bigEnd()); + IntVectND low(b.smallEnd()); + IntVectND hi(b.bigEnd()); int sm = low[dir]; low.setVal(dir,sm-len+1); hi.setVal(dir,sm); // // set dir'th bit to 1 = IndexType::NODE. // - IndexType typ(b.ixType()); + IndexTypeND typ(b.ixType()); typ.set(dir); - return Box(low,hi,typ); + return BoxND(low,hi,typ); } /** - * \brief Returns the edge-centered Box (in direction dir) defining - * the high side of Box b. + * \brief Returns the edge-centered BoxND (in direction dir) defining + * the high side of BoxND b. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box bdryHi (const Box& b, int dir, int len=1) noexcept +BoxND bdryHi (const BoxND& b, int dir, int len=1) noexcept { - IntVect low(b.smallEnd()); - IntVect hi(b.bigEnd()); + IntVectND low(b.smallEnd()); + IntVectND hi(b.bigEnd()); auto const bitval = b.type()[dir]; int bg = hi[dir] + 1 - bitval%2; low.setVal(dir,bg); @@ -1449,23 +1533,24 @@ Box bdryHi (const Box& b, int dir, int len=1) noexcept // // Set dir'th bit to 1 = IndexType::NODE. // - IndexType typ(b.ixType()); + IndexTypeND typ(b.ixType()); typ.set(dir); - return Box(low,hi,typ); + return BoxND(low,hi,typ); } /** * \brief Similar to bdryLo and bdryHi except that it operates on the * given face of box b. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box bdryNode (const Box& b, Orientation face, int len=1) noexcept +BoxND bdryNode (const BoxND& b, Orientation face, int len=1) noexcept { int dir = face.coordDir(); - IntVect low(b.smallEnd()); - IntVect hi(b.bigEnd()); + IntVectND low(b.smallEnd()); + IntVectND hi(b.bigEnd()); if (face.isLow()) { int sm = low[dir]; @@ -1482,51 +1567,53 @@ Box bdryNode (const Box& b, Orientation face, int len=1) noexcept // // Set dir'th bit to 1 = IndexType::NODE. // - IndexType typ(b.ixType()); + IndexTypeND typ(b.ixType()); typ.set(dir); - return Box(low,hi,typ); + return BoxND(low,hi,typ); } /** - * \brief Returns the cell centered Box of length len adjacent + * \brief Returns the cell centered BoxND of length len adjacent * to b on the low end along the coordinate direction dir. - * The return Box is identical to b in the other directions. - * The return Box and b have an empty intersection. + * The return BoxND is identical to b in the other directions. + * The return BoxND and b have an empty intersection. * NOTE: len >= 1 - * NOTE: Box retval = b.adjCellLo(b,dir,len) + * NOTE: BoxND retval = b.adjCellLo(b,dir,len) * is equivalent to the following set of operations: - * Box retval(b); - * retval.convert(dir,Box::CELL); + * BoxND retval(b); + * retval.convert(dir,BoxND::CELL); * retval.setrange(dir,retval.smallEnd(dir)-len,len); */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box adjCellLo (const Box& b, int dir, int len=1) noexcept +BoxND adjCellLo (const BoxND& b, int dir, int len=1) noexcept { BL_ASSERT(len > 0); - IntVect low(b.smallEnd()); - IntVect hi(b.bigEnd()); + IntVectND low(b.smallEnd()); + IntVectND hi(b.bigEnd()); int sm = low[dir]; low.setVal(dir,sm - len); hi.setVal(dir,sm - 1); // // Set dir'th bit to 0 = IndexType::CELL. // - IndexType typ(b.ixType()); + IndexTypeND typ(b.ixType()); typ.unset(dir); - return Box(low,hi,typ); + return BoxND(low,hi,typ); } - //! Similar to adjCellLo but builds an adjacent Box on the high end. + //! Similar to adjCellLo but builds an adjacent BoxND on the high end. +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box adjCellHi (const Box& b, int dir, int len=1) noexcept +BoxND adjCellHi (const BoxND& b, int dir, int len=1) noexcept { BL_ASSERT(len > 0); - IntVect low(b.smallEnd()); - IntVect hi(b.bigEnd()); + IntVectND low(b.smallEnd()); + IntVectND hi(b.bigEnd()); int bitval = b.type()[dir]; int bg = hi[dir] + 1 - bitval%2; low.setVal(dir,bg); @@ -1534,20 +1621,21 @@ Box adjCellHi (const Box& b, int dir, int len=1) noexcept // // Set dir'th bit to 0 = IndexType::CELL. // - IndexType typ(b.ixType()); + IndexTypeND typ(b.ixType()); typ.unset(dir); - return Box(low,hi,typ); + return BoxND(low,hi,typ); } //! Similar to adjCellLo and adjCellHi; operates on given face. +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box adjCell (const Box& b, Orientation face, int len=1) noexcept +BoxND adjCell (const BoxND& b, Orientation face, int len=1) noexcept { BL_ASSERT(len > 0); - IntVect low(b.smallEnd()); - IntVect hi(b.bigEnd()); + IntVectND low(b.smallEnd()); + IntVectND hi(b.bigEnd()); int dir = face.coordDir(); if (face.isLow()) { @@ -1565,225 +1653,310 @@ Box adjCell (const Box& b, Orientation face, int len=1) noexcept // // Set dir'th bit to 0 = IndexType::CELL. // - IndexType typ(b.ixType()); + IndexTypeND typ(b.ixType()); typ.unset(dir); - return Box(low,hi,typ); + return BoxND(low,hi,typ); } /** - * \brief Modify Box to that of the minimum Box containing both - * the original Box and the argument. - * Both Boxes must have identical type. + * \brief Modify BoxND to that of the minimum BoxND containing both + * the original BoxND and the argument. + * Both BoxNDes must have identical type. */ +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box minBox (const Box& b1, const Box& b2) noexcept +BoxND minBox (const BoxND& b1, const BoxND& b2) noexcept { - Box result = b1; + BoxND result = b1; result.minBox(b2); return result; } - //! Write an ASCII representation to the ostream. -std::ostream& operator<< (std::ostream& os, const Box& bx); +namespace detail { + std::ostream& box_write (std::ostream& os, const int * smallend, const int * bigend, + const int * type, int dim); + std::istream& box_read (std::istream& is, int * smallend, int * bigend, int * type, int dim); + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr + auto BoxSplit_imp (std::index_sequence, + const T& lo, const T& hi, const U& typ) noexcept { + return makeTuple(BoxND(get(lo), get(hi), get(typ))...); + } +} + +//! Write an ASCII representation to the ostream. +template +std::ostream& operator<< (std::ostream& os, const BoxND& bx) +{ + IntVectND type = bx.type(); + return detail::box_write(os, bx.smallEnd().begin(), bx.bigEnd().begin(), type.begin(), dim); +} + +//! Read from istream. +template +std::istream& operator>> (std::istream& is, BoxND& bx) { + IntVectND small; + IntVectND big; + IntVectND type; + detail::box_read(is, small.begin(), big.begin(), type.begin(), dim); + bx = BoxND{small, big, type}; + return is; +} + +/** +* \brief Returns a BoxND obtained by concatenating the input BoxNDs. +* The dimension of the return value equals the sum of the dimensions of the inputted BoxNDs. +*/ +template +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +constexpr BoxND()> +BoxCat (const BoxND& bx, const BoxND&...boxes) noexcept { + auto lo = IntVectCat(bx.smallEnd(), boxes.smallEnd()...); + auto hi = IntVectCat(bx.bigEnd(), boxes.bigEnd()...); + auto typ = IndexTypeCat(bx.ixType(), boxes.ixType()...); + return BoxND()>{lo, hi, typ}; +} + +/** +* \brief Returns a tuple of BoxNDs obtained by splitting the input BoxND +* according to the dimensions specified by the template arguments. +*/ +template +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +constexpr GpuTuple, BoxND...> +BoxSplit (const BoxND()>& bx) noexcept { + auto lo = IntVectSplit(bx.smallEnd()); + auto hi = IntVectSplit(bx.bigEnd()); + auto typ = IndexTypeSplit(bx.ixType()); + return detail::BoxSplit_imp(std::make_index_sequence<1 + sizeof...(dims)>(), lo, hi, typ); +} + +/** +* \brief Returns a new BoxND of dimension new_dim and +* assigns the first new_dim dimension of this BoxND to it. +*/ +template +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +constexpr BoxND +BoxShrink (const BoxND& bx) noexcept { + return bx.template shrink(); +} + +/** +* \brief Returns a new BoxND of size new_dim and +* assigns all values of this BoxND to it and +* (small=0, big=0, typ=CELL) to the remaining elements. +*/ +template +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +constexpr BoxND +BoxExpand (const BoxND& bx) noexcept { + return bx.template expand(); +} - //! Read from istream. -std::istream& operator>> (std::istream& is, Box& bx); +/** +* \brief Returns a new BoxND of size new_dim +* by either shrinking or expanding this BoxND +*/ +template +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +constexpr BoxND +BoxResize (const BoxND& bx) noexcept { + return bx.template resize(); +} +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 lbound (Box const& box) noexcept +IntVectND lbound_iv (BoxND const& box) noexcept { -#if (AMREX_SPACEDIM == 1) - return {box.smallend[0], 0, 0}; -#elif (AMREX_SPACEDIM == 2) - return {box.smallend[0], box.smallend[1], 0}; -#elif (AMREX_SPACEDIM == 3) - return {box.smallend[0], box.smallend[1], box.smallend[2]}; -#endif + return box.smallEnd(); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 ubound (Box const& box) noexcept +IntVectND ubound_iv (BoxND const& box) noexcept { -#if (AMREX_SPACEDIM == 1) - return {box.bigend[0], 0, 0}; -#elif (AMREX_SPACEDIM == 2) - return {box.bigend[0], box.bigend[1], 0}; -#elif (AMREX_SPACEDIM == 3) - return {box.bigend[0], box.bigend[1], box.bigend[2]}; -#endif + return box.bigEnd(); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 begin (Box const& box) noexcept +IntVectND begin_iv (BoxND const& box) noexcept { -#if (AMREX_SPACEDIM == 1) - return {box.smallend[0], 0, 0}; -#elif (AMREX_SPACEDIM == 2) - return {box.smallend[0], box.smallend[1], 0}; -#elif (AMREX_SPACEDIM == 3) - return {box.smallend[0], box.smallend[1], box.smallend[2]}; -#endif + return box.smallEnd(); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 end (Box const& box) noexcept +IntVectND end_iv (BoxND const& box) noexcept { -#if (AMREX_SPACEDIM == 1) - return {box.bigend[0]+1, 1, 1}; -#elif (AMREX_SPACEDIM == 2) - return {box.bigend[0]+1, box.bigend[1]+1, 1}; -#elif (AMREX_SPACEDIM == 3) - return {box.bigend[0]+1, box.bigend[1]+1, box.bigend[2]+1}; -#endif + return box.bigEnd() + 1; } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 length (Box const& box) noexcept +IntVectND length_iv (BoxND const& box) noexcept { -#if (AMREX_SPACEDIM == 1) - return {box.bigend[0]-box.smallend[0]+1, 1, 1}; -#elif (AMREX_SPACEDIM == 2) - return {box.bigend[0]-box.smallend[0]+1, - box.bigend[1]-box.smallend[1]+1, 1}; -#elif (AMREX_SPACEDIM == 3) - return {box.bigend[0]-box.smallend[0]+1, - box.bigend[1]-box.smallend[1]+1, - box.bigend[2]-box.smallend[2]+1}; -#endif + return box.bigEnd() - box.smallEnd() + 1; } // Max of lower bound +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 max_lbound (Box const& b1, Box const& b2) noexcept +IntVectND max_lbound_iv (BoxND const& b1, BoxND const& b2) noexcept { -#if (AMREX_SPACEDIM == 1) - return {amrex::max(b1.smallend[0], b2.smallend[0]), - 0, 0}; -#elif (AMREX_SPACEDIM == 2) - return {amrex::max(b1.smallend[0], b2.smallend[0]), - amrex::max(b1.smallend[1], b2.smallend[1]), - 0}; -#elif (AMREX_SPACEDIM == 3) - return {amrex::max(b1.smallend[0], b2.smallend[0]), - amrex::max(b1.smallend[1], b2.smallend[1]), - amrex::max(b1.smallend[2], b2.smallend[2])}; -#endif + return max(b1.smallEnd(), b2.smallEnd()); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 max_lbound (Box const& b1, Dim3 const& lo) noexcept +IntVectND max_lbound_iv (BoxND const& b1, IntVectND const& lo) noexcept { -#if (AMREX_SPACEDIM == 1) - return {amrex::max(b1.smallend[0], lo.x), - 0, 0}; -#elif (AMREX_SPACEDIM == 2) - return {amrex::max(b1.smallend[0], lo.x), - amrex::max(b1.smallend[1], lo.y), - 0}; -#elif (AMREX_SPACEDIM == 3) - return {amrex::max(b1.smallend[0], lo.x), - amrex::max(b1.smallend[1], lo.y), - amrex::max(b1.smallend[2], lo.z)}; -#endif + return max(b1.smallEnd(), lo); } // Min of upper bound +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 min_ubound (Box const& b1, Box const& b2) noexcept +IntVectND min_ubound_iv (BoxND const& b1, BoxND const& b2) noexcept { -#if (AMREX_SPACEDIM == 1) - return {amrex::min(b1.bigend[0], b2.bigend[0]), - 0, 0}; -#elif (AMREX_SPACEDIM == 2) - return {amrex::min(b1.bigend[0], b2.bigend[0]), - amrex::min(b1.bigend[1], b2.bigend[1]), - 0}; -#elif (AMREX_SPACEDIM == 3) - return {amrex::min(b1.bigend[0], b2.bigend[0]), - amrex::min(b1.bigend[1], b2.bigend[1]), - amrex::min(b1.bigend[2], b2.bigend[2])}; -#endif + return min(b1.bigEnd(), b2.bigEnd()); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Dim3 min_ubound (Box const& b1, Dim3 const& hi) noexcept +IntVectND min_ubound_iv (BoxND const& b1, IntVectND const& hi) noexcept { -#if (AMREX_SPACEDIM == 1) - return {amrex::min(b1.bigend[0], hi.x), - 0, 0}; -#elif (AMREX_SPACEDIM == 2) - return {amrex::min(b1.bigend[0], hi.x), - amrex::min(b1.bigend[1], hi.y), - 0}; -#elif (AMREX_SPACEDIM == 3) - return {amrex::min(b1.bigend[0], hi.x), - amrex::min(b1.bigend[1], hi.y), - amrex::min(b1.bigend[2], hi.z)}; -#endif + return min(b1.bigEnd(), hi); } +template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box minBox (Box const& b1, Box const& b2, IndexType typ) noexcept -{ -#if (AMREX_SPACEDIM == 1) - return Box(IntVect(amrex::max(b1.smallend[0], b2.smallend[0])), - IntVect(amrex::min(b1.bigend [0], b2.bigend [0])), - typ); -#elif (AMREX_SPACEDIM == 2) - return Box(IntVect(amrex::max(b1.smallend[0], b2.smallend[0]), - amrex::max(b1.smallend[1], b2.smallend[1])), - IntVect(amrex::min(b1.bigend [0], b2.bigend [0]), - amrex::min(b1.bigend [1], b2.bigend [1])), - typ); -#elif (AMREX_SPACEDIM == 3) - return Box(IntVect(amrex::max(b1.smallend[0], b2.smallend[0]), - amrex::max(b1.smallend[1], b2.smallend[1]), - amrex::max(b1.smallend[2], b2.smallend[2])), - IntVect(amrex::min(b1.bigend [0], b2.bigend [0]), - amrex::min(b1.bigend [1], b2.bigend [1]), - amrex::min(b1.bigend [2], b2.bigend [2])), - typ); -#endif +Dim3 lbound (BoxND const& box) noexcept +{ + return box.smallEnd().dim3(); +} + +template = 0> +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Dim3 ubound (BoxND const& box) noexcept +{ + return box.bigEnd().dim3(); +} + +template = 0> +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Dim3 begin (BoxND const& box) noexcept +{ + return box.smallEnd().dim3(); +} + +template = 0> +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Dim3 end (BoxND const& box) noexcept +{ + return (box.bigEnd() + 1).dim3(1); +} + +template = 0> +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Dim3 length (BoxND const& box) noexcept +{ + return (box.bigEnd() - box.smallEnd() + 1).dim3(1); } -// Returns a Box that covers all the argument Boxes in index +// Max of lower bound +template = 0> +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Dim3 max_lbound (BoxND const& b1, BoxND const& b2) noexcept +{ + return max(b1.smallEnd(), b2.smallEnd()).dim3(); +} + +template = 0> +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Dim3 max_lbound (BoxND const& b1, Dim3 const& lo) noexcept +{ + return max(b1.smallEnd(), IntVectND(lo)).dim3(); +} + +// Min of upper bound +template = 0> +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Dim3 min_ubound (BoxND const& b1, BoxND const& b2) noexcept +{ + return min(b1.bigEnd(), b2.bigEnd()).dim3(); +} + +template = 0> +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Dim3 min_ubound (BoxND const& b1, Dim3 const& hi) noexcept +{ + return min(b1.bigEnd(), IntVectND(hi)).dim3(); +} + +// Returns a BoxND that covers all the argument Boxes in index // space. The types are ignored. Thus, the arguments can have -// different index types, and the returned Box's index type has no +// different index types, and the returned BoxND's index type has no // meaning. +template [[nodiscard]] AMREX_FORCE_INLINE -Box getIndexBounds (Box const& b1) noexcept +BoxND getIndexBounds (BoxND const& b1) noexcept { return b1; } +template [[nodiscard]] AMREX_FORCE_INLINE -Box getIndexBounds (Box const& b1, Box const& b2) noexcept +BoxND getIndexBounds (BoxND const& b1, BoxND const& b2) noexcept { - Box b = b1; + BoxND b = b1; b.setType(b2.ixType()); b.minBox(b2); return b; @@ -1792,16 +1965,17 @@ Box getIndexBounds (Box const& b1, Box const& b2) noexcept template [[nodiscard]] AMREX_FORCE_INLINE -Box getIndexBounds (T const& b1, T const& b2, Ts const& ... b3) noexcept +auto getIndexBounds (T const& b1, T const& b2, Ts const& ... b3) noexcept { return getIndexBounds(getIndexBounds(b1,b2),b3...); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -IntVect getCell (Box const* boxes, int nboxes, Long icell) noexcept +IntVectND getCell (BoxND const* boxes, int nboxes, Long icell) noexcept { int ibox; Long ncells_subtotal = 0; @@ -1817,117 +1991,113 @@ IntVect getCell (Box const* boxes, int nboxes, Long icell) noexcept return boxes[ibox].atOffset(offset); } +template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box makeSlab (Box const& b, int direction, int slab_index) noexcept +BoxND makeSlab (BoxND const& b, int direction, int slab_index) noexcept { - Box r = b; + BoxND r = b; r.makeSlab(direction,slab_index); return r; } +template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Box makeSingleCellBox (int i, int j, int k, IndexType typ = IndexType::TheCellType()) +BoxND makeSingleCellBox (int i, int j, int k, IndexTypeND typ = IndexTypeND::TheCellType()) { -#if (AMREX_SPACEDIM == 1) - amrex::ignore_unused(j,k); -#elif (AMREX_SPACEDIM == 2) - amrex::ignore_unused(k); -#endif - return Box(IntVect(AMREX_D_DECL(i,j,k)),IntVect(AMREX_D_DECL(i,j,k)),typ); + Dim3 p3d{i, j, k}; + IntVectND vect{p3d}; + return BoxND{vect, vect, typ}; } -struct BoxIndexer +template +[[nodiscard]] +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +BoxND makeSingleCellBox (IntVectND const& vect, IndexTypeND typ = IndexTypeND::TheCellType()) { - std::uint64_t npts; + return BoxND{vect, vect, typ}; +} -#if (AMREX_SPACEDIM == 3) - Math::FastDivmodU64 fdxy; - Math::FastDivmodU64 fdx; - IntVect lo; +template +struct BoxIndexerND +{ + std::uint64_t npts; + Math::FastDivmodU64 fdm[dim-1]; + IntVectND lo; - BoxIndexer (Box const& box) + BoxIndexerND (BoxND const& box) : npts(box.numPts()), - fdxy(std::uint64_t(box.length(0))*std::uint64_t(box.length(1))), - fdx (std::uint64_t(box.length(0))), lo (box.smallEnd()) - {} - - [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Dim3 operator() (std::uint64_t icell) const - { - std::uint64_t x, y, z, rem; - fdxy(z, rem, icell); - fdx(y, x, rem); - return {int(x)+lo[0], int(y)+lo[1], int(z)+lo[2]}; - } + { + std::uint64_t mult = 1; + for (int i=0; i intVect (std::uint64_t icell) const { - std::uint64_t x, y, z, rem; - fdxy(z, rem, icell); - fdx(y, x, rem); - return {int(x)+lo[0], int(y)+lo[1], int(z)+lo[2]}; - } + IntVectND retval = lo; -#elif (AMREX_SPACEDIM == 2) + for (int i=dim-1; i>0; --i) { + std::uint64_t quotient, remainder; + fdm[i-1](quotient, remainder, icell); + retval[i] += quotient; + icell = remainder; + } - Math::FastDivmodU64 fdx; - IntVect lo; + retval[0] += icell; - BoxIndexer (Box const& box) - : npts(box.numPts()), - fdx (std::uint64_t(box.length(0))), - lo (box.smallEnd()) - {} + return retval; + } + template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 operator() (std::uint64_t icell) const { - std::uint64_t x, y; - fdx(y, x, icell); - return {int(x)+lo[0], int(y)+lo[1], 0}; + return intVect(icell).dim3(); } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - IntVect intVect (std::uint64_t icell) const - { - std::uint64_t x, y; - fdx(y, x, icell); - return {int(x)+lo[0], int(y)+lo[1]}; - } + std::uint64_t numPts () const { return npts; } +}; -#elif (AMREX_SPACEDIM == 1) +template<> +struct BoxIndexerND<1> +{ + std::uint64_t npts; int lo; - BoxIndexer (Box const& box) + BoxIndexerND (BoxND<1> const& box) : npts(box.numPts()), lo(box.smallEnd(0)) {} [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Dim3 operator() (std::uint64_t icell) const + IntVectND<1> intVect (std::uint64_t icell) const { - return {int(icell)+lo, 0, 0}; + return IntVectND<1>{int(icell)+lo}; } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - IntVect intVect (std::uint64_t icell) const + Dim3 operator() (std::uint64_t icell) const { - return IntVect{int(icell)+lo}; + return {int(icell)+lo, 0, 0}; } -#endif - [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::uint64_t numPts () const { return npts; } }; +using BoxIndexer = BoxIndexerND; + } #endif /*AMREX_BOX_H*/ diff --git a/Src/Base/AMReX_Box.cpp b/Src/Base/AMReX_Box.cpp index b0db9caa994..0f53bda5e99 100644 --- a/Src/Base/AMReX_Box.cpp +++ b/Src/Base/AMReX_Box.cpp @@ -10,19 +10,23 @@ namespace amrex { +namespace detail { + // // I/O functions. // std::ostream& -operator<< (std::ostream& os, - const Box& b) +box_write (std::ostream& os, + const int * smallend, + const int * bigend, + const int * type, + int dim) { - os << '(' - << b.smallEnd() << ' ' - << b.bigEnd() << ' ' - << b.type() - << ')'; + os << '('; + int_vector_write(os, smallend, dim) << ' '; + int_vector_write(os, bigend, dim) << ' '; + int_vector_write(os, type, dim) << ')'; if (os.fail()) { amrex::Error("operator<<(ostream&,Box&) failed"); @@ -37,37 +41,44 @@ operator<< (std::ostream& os, #define BL_IGNORE_MAX 100000 std::istream& -operator>> (std::istream& is, - Box& b) +box_read (std::istream& is, + int * smallend, + int * bigend, + int * type, + int dim) { - IntVect lo, hi, typ; - is >> std::ws; char c; is >> c; + for (int i=0; i> lo >> hi; + int_vector_read(is, smallend, dim); + int_vector_read(is, bigend, dim); is >> c; // Read an optional IndexType is.putback(c); if ( c == '(' ) { - is >> typ; + int_vector_read(is, type, dim); } is.ignore(BL_IGNORE_MAX,')'); } else if (c == '<') { is.putback(c); - is >> lo >> hi; + int_vector_read(is, smallend, dim); + int_vector_read(is, bigend, dim); is >> c; // Read an optional IndexType is.putback(c); if ( c == '<' ) { - is >> typ; + int_vector_read(is, type, dim); } //is.ignore(BL_IGNORE_MAX,'>'); } @@ -76,8 +87,6 @@ operator>> (std::istream& is, amrex::Error("operator>>(istream&,Box&): expected \'(\'"); } - b = Box(lo,hi,typ); - if (is.fail()) { amrex::Error("operator>>(istream&,Box&) failed"); } @@ -85,6 +94,8 @@ operator>> (std::istream& is, return is; } +} // namespace detail + BoxCommHelper::BoxCommHelper (const Box& bx, int* p_) : p(p_) { diff --git a/Src/Base/AMReX_BoxArray.H b/Src/Base/AMReX_BoxArray.H index 19cee3cefb3..b3b339c33bb 100644 --- a/Src/Base/AMReX_BoxArray.H +++ b/Src/Base/AMReX_BoxArray.H @@ -615,6 +615,11 @@ public: BoxArray& maxSize (const IntVect& block_size); + //! Forces each Box in BoxArray to have sizes >= min_size and <= + //! max_size. It's the caller's responsibility to make sure both the + //! BoxArray and max_size are coarsenable by min_size. + BoxArray& minmaxSize (const IntVect& min_size, const IntVect& max_size); + //! Refine each Box in the BoxArray to the specified ratio. BoxArray& refine (int refinement_ratio); diff --git a/Src/Base/AMReX_BoxArray.cpp b/Src/Base/AMReX_BoxArray.cpp index ecffd06d8a3..9bca5943526 100644 --- a/Src/Base/AMReX_BoxArray.cpp +++ b/Src/Base/AMReX_BoxArray.cpp @@ -555,12 +555,27 @@ BoxArray::maxSize (const IntVect& block_size) blst.maxSize(block_size); const int N = static_cast(blst.size()); if (size() != N) { // If size doesn't change, do nothing. - BoxList bak = (m_simplified_list) ? *m_simplified_list : BoxList(); + std::shared_ptr bak; + bak.swap(m_simplified_list); define(std::move(blst)); - if (bak.isNotEmpty()) { - m_simplified_list = std::make_shared(std::move(bak)); - } + m_simplified_list = std::move(bak); + } + return *this; +} + +BoxArray& +BoxArray::minmaxSize (const IntVect& min_size, const IntVect& max_size) +{ + AMREX_ASSERT(this->coarsenable(min_size) && + (max_size/min_size)*min_size == max_size); + std::shared_ptr bak; + if (m_bat.is_simple() && crseRatio() == IntVect::TheUnitVector()) { + bak.swap(m_simplified_list); } + this->coarsen(min_size); + this->maxSize(max_size/min_size); + this->refine(min_size); + m_simplified_list = std::move(bak); return *this; } diff --git a/Src/Base/AMReX_CArena.cpp b/Src/Base/AMReX_CArena.cpp index c47f8f5ed26..42987f47a86 100644 --- a/Src/Base/AMReX_CArena.cpp +++ b/Src/Base/AMReX_CArena.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #ifdef AMREX_TINY_PROFILING @@ -57,7 +58,11 @@ CArena::alloc_protected (std::size_t nbytes) } #endif - if (static_cast(m_used+nbytes) >= arena_info.release_threshold) { + if (static_cast(m_used+nbytes) >= arena_info.release_threshold +#ifdef AMREX_USE_GPU + && (MFIter::currentDepth() == 0) +#endif + ) { freeUnused_protected(); } @@ -393,7 +398,11 @@ CArena::hasFreeDeviceMemory (std::size_t sz) std::size_t nbytes = Arena::align(sz == 0 ? 1 : sz); - if (static_cast(m_used+nbytes) >= arena_info.release_threshold) { + if (static_cast(m_used+nbytes) >= arena_info.release_threshold +#ifdef AMREX_USE_GPU + && (MFIter::currentDepth() == 0) +#endif + ) { freeUnused_protected(); } diff --git a/Src/Base/AMReX_DistributionMapping.cpp b/Src/Base/AMReX_DistributionMapping.cpp index 958f7ef9ec6..cfe16b346a1 100644 --- a/Src/Base/AMReX_DistributionMapping.cpp +++ b/Src/Base/AMReX_DistributionMapping.cpp @@ -118,14 +118,15 @@ DistributionMapping::Initialize () ParmParse pp("DistributionMapping"); - pp.queryAdd("v" , verbose); - pp.queryAdd("verbose", verbose); - pp.queryAdd("efficiency", max_efficiency); - pp.queryAdd("sfc_threshold", sfc_threshold); - pp.queryAdd("node_size", node_size); - pp.queryAdd("verbose_mapper", flag_verbose_mapper); - - std::string theStrategy; + if (! pp.query("verbose", "v", verbose)) { + pp.add("verbose", verbose); + } + pp.query("efficiency", max_efficiency); + pp.query("sfc_threshold", sfc_threshold); + pp.query("node_size", node_size); + pp.query("verbose_mapper", flag_verbose_mapper); + + std::string theStrategy("SFC"); if (pp.query("strategy", theStrategy)) { diff --git a/Src/Base/AMReX_Enum.H b/Src/Base/AMReX_Enum.H new file mode 100644 index 00000000000..09583f5b73f --- /dev/null +++ b/Src/Base/AMReX_Enum.H @@ -0,0 +1,81 @@ +#ifndef AMREX_ENUM_H_ +#define AMREX_ENUM_H_ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +template +using amrex_enum_traits = decltype(amrex_get_enum_traits(std::declval())); + +namespace amrex { + template , + std::enable_if_t = 0> + T getEnum (std::string_view const& s) + { + auto pos = ET::enum_names.find(s); + if (pos == std::string_view::npos) { + std::string error_msg("amrex::getEnum: Unknown enum: "); + error_msg.append(s).append(" in AMREX_ENUM(").append(ET::class_name) + .append(", ").append(ET::enum_names).append(")."); + throw std::runtime_error(error_msg); + } + auto count = std::count(ET::enum_names.begin(), + ET::enum_names.begin()+pos, ','); + return static_cast(count); + } + + template , + std::enable_if_t = 0> + std::string getEnumNameString (T const& v) + { + auto n = static_cast(v); + std::size_t pos = 0; + for (int i = 0; i < n; ++i) { + pos = ET::enum_names.find(',', pos); + if (pos == std::string::npos) { + std::string error_msg("amrex::getEnum: Unknown enum value: "); + error_msg.append(std::to_string(n)).append(" in AMREX_ENUM(") + .append(ET::class_name).append(", ").append(ET::enum_names) + .append(")."); + throw std::runtime_error(error_msg); + } + ++pos; + } + auto pos2 = ET::enum_names.find(',', pos); + return amrex::trim(std::string(ET::enum_names.substr(pos,pos2-pos))); + } + + template , + std::enable_if_t = 0> + std::vector getEnumNameStrings () + { + return amrex::split(std::string(ET::enum_names), ", "); + } + + template , + std::enable_if_t = 0> + std::string getEnumClassName () + { + return std::string(ET::class_name); + } +} + +#define AMREX_ENUM(CLASS, ...) \ + enum class CLASS : int { __VA_ARGS__ }; \ + struct CLASS##_EnumTraits { \ + using enum_class_t = CLASS; \ + static constexpr bool value = true; \ + static constexpr std::string_view class_name{#CLASS}; \ + static constexpr std::string_view enum_names{#__VA_ARGS__}; \ + }; \ + CLASS##_EnumTraits amrex_get_enum_traits(CLASS) + +#endif diff --git a/Src/Base/AMReX_FArrayBox.cpp b/Src/Base/AMReX_FArrayBox.cpp index ecb7fc0f4fc..3341c3da5dd 100644 --- a/Src/Base/AMReX_FArrayBox.cpp +++ b/Src/Base/AMReX_FArrayBox.cpp @@ -24,10 +24,8 @@ bool FArrayBox::initialized = false; #if defined(AMREX_DEBUG) || defined(AMREX_TESTING) bool FArrayBox::do_initval = true; -bool FArrayBox::init_snan = true; #else bool FArrayBox::do_initval = false; -bool FArrayBox::init_snan = false; #endif Real FArrayBox::initval; @@ -144,6 +142,9 @@ FArrayBox::FArrayBox (const Box& b, int ncomp, Real const* p) noexcept void FArrayBox::initVal () noexcept { + // If amrex::InitSNaN is true, snans have been filled by BaseFab. + if (amrex::InitSNaN()) { return; } + Real * p = dataPtr(); Long s = size(); if (p && s > 0) { @@ -158,22 +159,7 @@ FArrayBox::initVal () noexcept runon = RunOn::Cpu; #endif - if (init_snan) { -#if defined(AMREX_USE_GPU) - if (runon == RunOn::Gpu) - { - amrex::ParallelFor(s, [=] AMREX_GPU_DEVICE (Long i) noexcept - { - p[i] = std::numeric_limits::signaling_NaN(); - }); - Gpu::streamSynchronize(); - } - else -#endif - { - amrex_array_init_snan(p, s); - } - } else if (do_initval) { + if (do_initval) { const Real x = initval; AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG (runon, s, i, { @@ -435,9 +421,8 @@ FArrayBox::Initialize () ? std::numeric_limits::quiet_NaN() : std::numeric_limits::max(); - pp.queryAdd("initval", initval); - pp.queryAdd("do_initval", do_initval); - pp.queryAdd("init_snan", init_snan); + pp.query("initval", initval); + pp.query("do_initval", do_initval); amrex::ExecOnFinalize(FArrayBox::Finalize); } diff --git a/Src/Base/AMReX_FEIntegrator.H b/Src/Base/AMReX_FEIntegrator.H index f8a002ef534..0e89ffb9d5a 100644 --- a/Src/Base/AMReX_FEIntegrator.H +++ b/Src/Base/AMReX_FEIntegrator.H @@ -53,9 +53,6 @@ public: // So we initialize S_new by copying the old state. IntegratorOps::Copy(S_new, S_old); - // Call the pre RHS hook - BaseT::pre_rhs_action(S_new, time); - // F = RHS(S, t) T& F = *F_nodes[0]; BaseT::Rhs(F, S_new, time); diff --git a/Src/Base/AMReX_FabArrayBase.cpp b/Src/Base/AMReX_FabArrayBase.cpp index d8820e97e6f..9acf440d98f 100644 --- a/Src/Base/AMReX_FabArrayBase.cpp +++ b/Src/Base/AMReX_FabArrayBase.cpp @@ -27,10 +27,7 @@ namespace amrex { -// -// Set default values in Initialize()!!! -// -int FabArrayBase::MaxComp; +int FabArrayBase::MaxComp = 25; #if defined(AMREX_USE_GPU) @@ -99,11 +96,6 @@ FabArrayBase::Initialize () if (initialized) { return; } initialized = true; - // - // Set default values here!!! - // - FabArrayBase::MaxComp = 25; - ParmParse pp("fabarray"); Vector tilesize(AMREX_SPACEDIM); @@ -112,13 +104,25 @@ FabArrayBase::Initialize () { for (int i=0; i{AMREX_D_DECL(FabArrayBase::mfiter_tile_size[0], + FabArrayBase::mfiter_tile_size[1], + FabArrayBase::mfiter_tile_size[2])}); + } if (pp.queryarr("comm_tile_size", tilesize, 0, AMREX_SPACEDIM)) { for (int i=0; i{AMREX_D_DECL(FabArrayBase::comm_tile_size[0], + FabArrayBase::comm_tile_size[1], + FabArrayBase::comm_tile_size[2])}); + } - pp.queryAdd("maxcomp", FabArrayBase::MaxComp); + pp.query("maxcomp", FabArrayBase::MaxComp); if (MaxComp < 1) { MaxComp = 1; @@ -341,7 +345,7 @@ FabArrayBase::CPC::define (const BoxArray& ba_dst, const DistributionMapping& dm std::vector< std::pair > isects; - const std::vector& pshifts = m_period.shiftIntVect(); + const std::vector& pshifts = m_period.shiftIntVect(ng_dst); auto& send_tags = *m_SndTags; @@ -668,7 +672,7 @@ FabArrayBase::define_fb_metadata (CommMetaData& cmd, const IntVect& nghost, const IntVect ng_ng =nghost - 1; std::vector< std::pair > isects; - const std::vector& pshifts = period.shiftIntVect(); + const std::vector& pshifts = period.shiftIntVect(nghost); auto& send_tags = *cmd.m_SndTags; @@ -897,7 +901,7 @@ FabArrayBase::FB::define_epo (const FabArrayBase& fa) const IndexType& typ = ba.ixType(); std::vector< std::pair > isects; - const std::vector& pshifts = m_period.shiftIntVect(); + const std::vector& pshifts = m_period.shiftIntVect(ng); auto& send_tags = *m_SndTags; @@ -1049,7 +1053,7 @@ void FabArrayBase::FB::tag_one_box (int krcv, BoxArray const& ba, DistributionMa std::vector > isects2; std::vector > isects3; - auto const& pshifts = m_period.shiftIntVect(); + auto const& pshifts = m_period.shiftIntVect(m_ngrow); for (auto const& shft: pshifts) { ba.intersections(gbx+shft, isects2); for (auto const& is2 : isects2) { @@ -1140,7 +1144,7 @@ FabArrayBase::FB::define_os (const FabArrayBase& fa) #ifdef AMREX_USE_MPI if (ParallelDescriptor::NProcs() > 1) { - const std::vector& pshifts = m_period.shiftIntVect(); + const std::vector& pshifts = m_period.shiftIntVect(m_ngrow); std::vector< std::pair > isects; std::set my_receiver; diff --git a/Src/Base/AMReX_ForkJoin.cpp b/Src/Base/AMReX_ForkJoin.cpp index f0cc03ae04b..ce51203ab35 100644 --- a/Src/Base/AMReX_ForkJoin.cpp +++ b/Src/Base/AMReX_ForkJoin.cpp @@ -68,8 +68,8 @@ ForkJoin::ForkJoin (const Vector &task_rank_pct) void ForkJoin::init(const Vector &task_rank_n) { - ParmParse pp("forkjoin"); - pp.queryAdd("verbose", flag_verbose); + ParmParse pp("amrex.forkjoin"); + pp.query("verbose", flag_verbose); const auto task_n = task_rank_n.size(); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(task_n > 0, diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 01759e1b502..6231fbd1f9b 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -425,6 +425,14 @@ public: */ void computeRoundoffDomain (); + //! Returns roundoff domain's lower end + [[nodiscard]] GpuArray const& + RoundOffLo () const { return roundoff_lo; } + + //! Returns roundoff domain's higher end + [[nodiscard]] GpuArray const& + RoundOffHi () const { return roundoff_hi; } + private: void read_params (); @@ -440,6 +448,8 @@ private: // Box domain; + + friend std::istream& operator>> (std::istream&, Geometry&); }; diff --git a/Src/Base/AMReX_Geometry.cpp b/Src/Base/AMReX_Geometry.cpp index 2ec216ae109..4b63f29fdd8 100644 --- a/Src/Base/AMReX_Geometry.cpp +++ b/Src/Base/AMReX_Geometry.cpp @@ -15,22 +15,18 @@ namespace amrex { std::ostream& -operator<< (std::ostream& os, - const Geometry& g) +operator<< (std::ostream& os, const Geometry& g) { os << (CoordSys&) g << g.ProbDomain() << g.Domain() << 'P' << IntVect(g.isPeriodic()); return os; } std::istream& -operator>> (std::istream& is, - Geometry& g) +operator>> (std::istream& is, Geometry& g) { - Box bx; - RealBox rb; - is >> (CoordSys&) g >> rb >> bx; - g.Domain(bx); - g.ProbDomain(rb); + is >> (CoordSys&) g >> g.prob_domain >> g.domain; + + g.computeRoundoffDomain(); int ic = is.peek(); if (ic == static_cast('P')) { diff --git a/Src/Base/AMReX_GpuDevice.H b/Src/Base/AMReX_GpuDevice.H index 7c17c918a70..a7aef5a9243 100644 --- a/Src/Base/AMReX_GpuDevice.H +++ b/Src/Base/AMReX_GpuDevice.H @@ -14,6 +14,7 @@ #include #include #include +#include #include #define AMREX_GPU_MAX_STREAMS 8 @@ -318,6 +319,52 @@ dtod_memcpy (void* p_d_dst, const void* p_d_src, const std::size_t sz) noexcept void hypreSynchronize (); #endif +//! Copy `nbytes` bytes from host to device global variable. `offset` is the +//! offset in bytes from the start of the device global variable. +template +void memcpy_from_host_to_device_global_async (T& dg, const void* src, + std::size_t nbytes, + std::size_t offset = 0) +{ +#if defined(AMREX_USE_CUDA) + AMREX_CUDA_SAFE_CALL(cudaMemcpyToSymbolAsync(dg, src, nbytes, offset, + cudaMemcpyHostToDevice, + Device::gpuStream())); +#elif defined(AMREX_USE_HIP) + AMREX_HIP_SAFE_CALL(hipMemcpyToSymbolAsync(dg, src, nbytes, offset, + hipMemcpyHostToDevice, + Device::gpuStream())); +#elif defined(AMREX_USE_SYCL) + Device::streamQueue().memcpy(dg, src, nbytes, offset); +#else + auto* p = (char*)(&dg); + std::memcpy(p+offset, src, nbytes); +#endif +} + +//! Copy `nbytes` bytes from device global variable to host. `offset` is the +//! offset in bytes from the start of the device global variable. +template +void memcpy_from_device_global_to_host_async (void* dst, T const& dg, + std::size_t nbytes, + std::size_t offset = 0) +{ +#if defined(AMREX_USE_CUDA) + AMREX_CUDA_SAFE_CALL(cudaMemcpyFromSymbolAsync(dst, dg, nbytes, offset, + cudaMemcpyDeviceToHost, + Device::gpuStream())); +#elif defined(AMREX_USE_HIP) + AMREX_HIP_SAFE_CALL(hipMemcpyFromSymbolAsync(dst, dg, nbytes, offset, + hipMemcpyDeviceToHost, + Device::gpuStream())); +#elif defined(AMREX_USE_SYCL) + Device::streamQueue().memcpy(dst, dg, nbytes, offset); +#else + auto const* p = (char const*)(&dg); + std::memcpy(dst, p+offset, nbytes); +#endif +} + } #endif diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index fa190a22c1c..d911349a61f 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -19,7 +19,11 @@ #if defined(AMREX_USE_CUDA) #include #if defined(AMREX_PROFILING) || defined (AMREX_TINY_PROFILING) -#include +#if __has_include() +# include +#else +# include +#endif #endif #endif @@ -155,9 +159,9 @@ Device::Initialize () max_gpu_streams = std::max(max_gpu_streams, 1); ParmParse pp("device"); - - pp.queryAdd("v", verbose); - pp.queryAdd("verbose", verbose); + if (! pp.query("verbose", "v", verbose)) { + pp.add("verbose", verbose); + } if (amrex::Verbose()) { AMREX_HIP_OR_CUDA_OR_SYCL @@ -519,9 +523,9 @@ Device::initialize_gpu () int ny = 0; int nz = 0; - pp.queryAdd("numThreads.x", nx); - pp.queryAdd("numThreads.y", ny); - pp.queryAdd("numThreads.z", nz); + pp.query("numThreads.x", nx); + pp.query("numThreads.y", ny); + pp.query("numThreads.z", nz); numThreadsOverride.x = (int) nx; numThreadsOverride.y = (int) ny; @@ -531,9 +535,9 @@ Device::initialize_gpu () ny = 0; nz = 0; - pp.queryAdd("numBlocks.x", nx); - pp.queryAdd("numBlocks.y", ny); - pp.queryAdd("numBlocks.z", nz); + pp.query("numBlocks.x", nx); + pp.query("numBlocks.y", ny); + pp.query("numBlocks.z", nz); numBlocksOverride.x = (int) nx; numBlocksOverride.y = (int) ny; @@ -542,8 +546,8 @@ Device::initialize_gpu () // Graph initialization int graph_init = 0; int graph_size = 10000; - pp.queryAdd("graph_init", graph_init); - pp.queryAdd("graph_init_nodes", graph_size); + pp.query("graph_init", graph_init); + pp.query("graph_init_nodes", graph_size); if (graph_init) { diff --git a/Src/Base/AMReX_GpuLaunch.nolint.H b/Src/Base/AMReX_GpuLaunch.nolint.H index c7df1737517..bb1bbb2453b 100644 --- a/Src/Base/AMReX_GpuLaunch.nolint.H +++ b/Src/Base/AMReX_GpuLaunch.nolint.H @@ -1,9 +1,8 @@ // Do not include this header anywhere other than AMReX_GpuLaunch.H. // The purpose of this file is to avoid clang-tidy. -#define AMREX_WRONG_NUM_ARGS(...) static_assert(false,"Wrong number of arguments to macro") -#define AMREX_GET_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME -#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_MACRO(__VA_ARGS__,\ +#define AMREX_GET_LAUNCH_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME +#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\ AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3, \ AMREX_WRONG_NUM_ARGS, \ AMREX_WRONG_NUM_ARGS, \ @@ -14,7 +13,7 @@ AMREX_WRONG_NUM_ARGS, \ AMREX_WRONG_NUM_ARGS)(__VA_ARGS__) -#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_MACRO(__VA_ARGS__,\ +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\ AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3, \ AMREX_WRONG_NUM_ARGS, \ AMREX_WRONG_NUM_ARGS, \ diff --git a/Src/Base/AMReX_GpuLaunchFunctsC.H b/Src/Base/AMReX_GpuLaunchFunctsC.H index 1850ff6ff96..927b729f7ed 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsC.H +++ b/Src/Base/AMReX_GpuLaunchFunctsC.H @@ -5,53 +5,115 @@ namespace amrex { namespace detail { + + // call_f_scalar_handler + template - AMREX_GPU_DEVICE - auto call_f (F const& f, N i) + AMREX_FORCE_INLINE + auto call_f_scalar_handler (F const& f, N i) noexcept -> decltype(f(0)) { f(i); } template - AMREX_GPU_DEVICE - auto call_f (F const& f, N i) + AMREX_FORCE_INLINE + auto call_f_scalar_handler (F const& f, N i) noexcept -> decltype(f(0,Gpu::Handler{})) { - f(i,Gpu::Handler{}); + f(i, Gpu::Handler{}); + } + + // call_f_intvect_inner + + template + AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND<1> iv, Args...args) + noexcept -> decltype(f(0, 0, 0, args...)) + { + f(iv[0], 0, 0, args...); + } + + template + AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND<2> iv, Args...args) + noexcept -> decltype(f(0, 0, 0, args...)) + { + f(iv[0], iv[1], 0, args...); + } + + template + AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND iv, Args...args) + noexcept -> decltype(f(iv, args...)) + { + f(iv, args...); + } + + template + AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND iv, Args...args) + noexcept -> decltype(f(iv[Ns]..., args...)) + { + f(iv[Ns]..., args...); + } + + // call_f_intvect_engine + + template + AMREX_FORCE_INLINE + auto call_f_intvect_engine (F const& f, IntVectND iv, RandomEngine engine) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, engine)) + { + call_f_intvect_inner(std::make_index_sequence(), f, iv, engine); + } + + // call_f_intvect_handler + + template + AMREX_FORCE_INLINE + auto call_f_intvect_handler (F const& f, IntVectND iv) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv)) + { + call_f_intvect_inner(std::make_index_sequence(), f, iv); } - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k) - noexcept -> decltype(f(0,0,0)) + template + AMREX_FORCE_INLINE + auto call_f_intvect_handler (F const& f, IntVectND iv) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, Gpu::Handler{})) { - f(i,j,k); + call_f_intvect_inner(std::make_index_sequence(), f, iv, Gpu::Handler{}); } - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k) - noexcept -> decltype(f(0,0,0,Gpu::Handler{})) + // call_f_intvect_ncomp_engine + + template + AMREX_FORCE_INLINE + auto call_f_intvect_ncomp_engine (F const& f, IntVectND iv, T n, RandomEngine engine) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, n, engine)) { - f(i,j,k,Gpu::Handler{}); + call_f_intvect_inner(std::make_index_sequence(), f, iv, n, engine); } - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, T n) - noexcept -> decltype(f(0,0,0,0)) + // call_f_intvect_ncomp_handler + + template + AMREX_FORCE_INLINE + auto call_f_intvect_ncomp_handler (F const& f, IntVectND iv, T n) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, n)) { - f(i,j,k,n); + call_f_intvect_inner(std::make_index_sequence(), f, iv, n); } - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, T n) - noexcept -> decltype(f(0,0,0,0,Gpu::Handler{})) + template + AMREX_FORCE_INLINE + auto call_f_intvect_ncomp_handler (F const& f, IntVectND iv, T n) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, n, Gpu::Handler{})) { - f(i,j,k,n,Gpu::Handler{}); + call_f_intvect_inner(std::make_index_sequence(), f, iv, n, Gpu::Handler{}); } + } template @@ -72,7 +134,7 @@ AMREX_ATTRIBUTE_FLATTEN_FOR void For (T n, L const& f) noexcept { for (T i = 0; i < n; ++i) { - detail::call_f(f,i); + detail::call_f_scalar_handler(f,i); } } @@ -102,7 +164,7 @@ void ParallelFor (T n, L const& f) noexcept { AMREX_PRAGMA_SIMD for (T i = 0; i < n; ++i) { - detail::call_f(f,i); + detail::call_f_scalar_handler(f,i); } } @@ -126,182 +188,294 @@ void ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept ParallelFor(n, std::forward(f)); } -template +namespace detail { + +template +AMREX_FORCE_INLINE +void For_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv) noexcept +{ + if constexpr (idim == 1) { + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_handler(f,iv); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_handler(f,iv); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_handler(f,iv); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + For_impND(f, lo, hi, iv); + } + } +} + +} + +template AMREX_ATTRIBUTE_FLATTEN_FOR -void For (Box const& box, L const& f) noexcept +void For (BoxND const& box, L const& f) noexcept { - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - detail::call_f(f,i,j,k); - }}} + const auto lo = amrex::lbound_iv(box); + const auto hi = amrex::ubound_iv(box); + IntVectND iv; + detail::For_impND(f, lo, hi, iv); } -template -void For (Box const& box, L&& f) noexcept +template +void For (BoxND const& box, L&& f) noexcept { amrex::ignore_unused(MT); For(box, std::forward(f)); } -template -void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +template +void For (Gpu::KernelInfo const&, BoxND const& box, L&& f) noexcept { For(box, std::forward(f)); } -template -void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +template +void For (Gpu::KernelInfo const&, BoxND const& box, L&& f) noexcept { amrex::ignore_unused(MT); For(box, std::forward(f)); } -template +namespace detail { + +template +AMREX_FORCE_INLINE +void ParallelFor_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv) noexcept +{ + if constexpr (idim == 1) { + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_handler(f,iv); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_handler(f,iv); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_handler(f,iv); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + ParallelFor_impND(f, lo, hi, iv); + } + } +} + +} + +template AMREX_ATTRIBUTE_FLATTEN_FOR -void ParallelFor (Box const& box, L const& f) noexcept +void ParallelFor (BoxND const& box, L const& f) noexcept { - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - detail::call_f(f,i,j,k); - }}} + const auto lo = amrex::lbound_iv(box); + const auto hi = amrex::ubound_iv(box); + IntVectND iv; + detail::ParallelFor_impND(f, lo, hi, iv); } -template -void ParallelFor (Box const& box, L&& f) noexcept +template +void ParallelFor (BoxND const& box, L&& f) noexcept { amrex::ignore_unused(MT); ParallelFor(box, std::forward(f)); } -template -void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +template +void ParallelFor (Gpu::KernelInfo const&, BoxND const& box, L&& f) noexcept { ParallelFor(box, std::forward(f)); } -template -void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +template +void ParallelFor (Gpu::KernelInfo const&, BoxND const& box, L&& f) noexcept { amrex::ignore_unused(MT); ParallelFor(box, std::forward(f)); } -template > > +namespace detail { + +template +AMREX_FORCE_INLINE +void For_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv, T n) noexcept +{ + if constexpr (idim == 1) { + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_handler(f,iv,n); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_handler(f,iv,n); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_handler(f,iv,n); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + For_impND(f, lo, hi, iv, n); + } + } +} + +} + +template > > AMREX_ATTRIBUTE_FLATTEN_FOR -void For (Box const& box, T ncomp, L const& f) noexcept +void For (BoxND const& box, T ncomp, L const& f) noexcept { - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); + const auto lo = amrex::lbound_iv(box); + const auto hi = amrex::ubound_iv(box); + IntVectND iv; for (T n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - detail::call_f(f,i,j,k,n); - }}} + detail::For_impND(f, lo, hi, iv, n); } } -template > > -void For (Box const& box, T ncomp, L&& f) noexcept +template > > +void For (BoxND const& box, T ncomp, L&& f) noexcept { amrex::ignore_unused(MT); For(box, ncomp, std::forward(f)); } -template > > -void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +template > > +void For (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L&& f) noexcept { For(box, ncomp, std::forward(f)); } -template > > -void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +template > > +void For (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L&& f) noexcept { amrex::ignore_unused(MT); For(box, ncomp, std::forward(f)); } -template > > -AMREX_ATTRIBUTE_FLATTEN_FOR -void ParallelFor (Box const& box, T ncomp, L const& f) noexcept +namespace detail { + +template +AMREX_FORCE_INLINE +void ParallelFor_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv, T n) noexcept { - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); - for (T n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { + if constexpr (idim == 1) { AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - detail::call_f(f,i,j,k,n); + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_handler(f,iv,n); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_handler(f,iv,n); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_handler(f,iv,n); }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + ParallelFor_impND(f, lo, hi, iv, n); + } } } -template > > -void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +} + +template > > +AMREX_ATTRIBUTE_FLATTEN_FOR +void ParallelFor (BoxND const& box, T ncomp, L const& f) noexcept +{ + const auto lo = amrex::lbound_iv(box); + const auto hi = amrex::ubound_iv(box); + IntVectND iv; + for (T n = 0; n < ncomp; ++n) { + detail::ParallelFor_impND(f, lo, hi, iv, n); + } +} + +template > > +void ParallelFor (BoxND const& box, T ncomp, L&& f) noexcept { amrex::ignore_unused(MT); ParallelFor(box, ncomp, std::forward(f)); } -template > > -void ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +template > > +void ParallelFor (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(box, ncomp, std::forward(f)); } -template > > -void ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +template > > +void ParallelFor (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L&& f) noexcept { amrex::ignore_unused(MT); ParallelFor(box, ncomp, std::forward(f)); } -template -void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void For (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { For(box1, std::forward(f1)); For(box2, std::forward(f2)); } -template -void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void For (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { amrex::ignore_unused(MT); For(box1, std::forward(f1)); For(box2, std::forward(f2)); } -template -void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void For (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { For (box1, box2, std::forward(f1), std::forward(f2)); } -template -void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void For (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { amrex::ignore_unused(MT); For (box1, box2, std::forward(f1), std::forward(f2)); } -template -void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +template +void For (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { For(box1, std::forward(f1)); For(box2, std::forward(f2)); For(box3, std::forward(f3)); } -template -void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +template +void For (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { amrex::ignore_unused(MT); For(box1, std::forward(f1)); @@ -309,81 +483,81 @@ void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L For(box3, std::forward(f3)); } -template -void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +template +void For (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { For(box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); } -template -void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +template +void For (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { amrex::ignore_unused(MT); For(box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); } -template >, typename M2=std::enable_if_t> > -void For (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void For (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { For(box1, ncomp1, std::forward(f1)); For(box2, ncomp2, std::forward(f2)); } -template >, typename M2=std::enable_if_t> > -void For (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void For (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { amrex::ignore_unused(MT); For(box1, ncomp1, std::forward(f1)); For(box2, ncomp2, std::forward(f2)); } -template >, typename M2=std::enable_if_t> > void For (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t> > void For (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { amrex::ignore_unused(MT); For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > -void For (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void For (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { For(box1, ncomp1, std::forward(f1)); For(box2, ncomp2, std::forward(f2)); For(box3, ncomp3, std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > -void For (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void For (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { amrex::ignore_unused(MT); For(box1, ncomp1, std::forward(f1)); @@ -391,28 +565,28 @@ void For (Box const& box1, T1 ncomp1, L1&& f1, For(box3, ncomp3, std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > void For (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { For(box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > void For (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { amrex::ignore_unused(MT); For(box1,ncomp1,std::forward(f1), @@ -420,44 +594,44 @@ void For (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } -template -void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void ParallelFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1, std::forward(f1)); ParallelFor(box2, std::forward(f2)); } -template -void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void ParallelFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { amrex::ignore_unused(MT); ParallelFor(box1, std::forward(f1)); ParallelFor(box2, std::forward(f2)); } -template -void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void ParallelFor (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } -template -void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void ParallelFor (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } -template -void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +template +void ParallelFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(box1, std::forward(f1)); ParallelFor(box2, std::forward(f2)); ParallelFor(box3, std::forward(f3)); } -template -void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +template +void ParallelFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { amrex::ignore_unused(MT); ParallelFor(box1, std::forward(f1)); @@ -465,83 +639,83 @@ void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2 ParallelFor(box3, std::forward(f3)); } -template -void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +template +void ParallelFor (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template -void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +template +void ParallelFor (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template >, typename M2=std::enable_if_t> > -void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void ParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(box1, ncomp1, std::forward(f1)); ParallelFor(box2, ncomp2, std::forward(f2)); } -template >, typename M2=std::enable_if_t> > -void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void ParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { amrex::ignore_unused(MT); ParallelFor(box1, ncomp1, std::forward(f1)); ParallelFor(box2, ncomp2, std::forward(f2)); } -template >, typename M2=std::enable_if_t> > void ParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t> > void ParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > -void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void ParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(box1, ncomp1, std::forward(f1)); ParallelFor(box2, ncomp2, std::forward(f2)); ParallelFor(box3, ncomp3, std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > -void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void ParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { amrex::ignore_unused(MT); ParallelFor(box1, ncomp1, std::forward(f1)); @@ -549,28 +723,28 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box3, ncomp3, std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > void ParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(box1, ncomp1, std::forward(f1), box2, ncomp2, std::forward(f2), box3, ncomp3, std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > void ParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { amrex::ignore_unused(MT); ParallelFor(box1, ncomp1, std::forward(f1), @@ -591,99 +765,99 @@ void HostDeviceParallelFor (T n, L&& f) noexcept ParallelFor(n,std::forward(f)); } -template -void HostDeviceParallelFor (Box const& box, L&& f) noexcept +template +void HostDeviceParallelFor (BoxND const& box, L&& f) noexcept { ParallelFor(box,std::forward(f)); } -template -void HostDeviceParallelFor (Box const& box, L&& f) noexcept +template +void HostDeviceParallelFor (BoxND const& box, L&& f) noexcept { amrex::ignore_unused(MT); ParallelFor(box,std::forward(f)); } -template > > -void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +template > > +void HostDeviceParallelFor (BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(box,ncomp,std::forward(f)); } -template > > -void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +template > > +void HostDeviceParallelFor (BoxND const& box, T ncomp, L&& f) noexcept { amrex::ignore_unused(MT); ParallelFor(box,ncomp,std::forward(f)); } -template -void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceParallelFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } -template -void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceParallelFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } -template -void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, +template +void HostDeviceParallelFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template -void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, +template +void HostDeviceParallelFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template >, typename M2=std::enable_if_t> > -void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void HostDeviceParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t> > -void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void HostDeviceParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > -void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void HostDeviceParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > -void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void HostDeviceParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,ncomp1,std::forward(f1), @@ -704,99 +878,99 @@ void HostDeviceFor (T n, L&& f) noexcept For(n,std::forward(f)); } -template -void HostDeviceFor (Box const& box, L&& f) noexcept +template +void HostDeviceFor (BoxND const& box, L&& f) noexcept { For(box,std::forward(f)); } -template -void HostDeviceFor (Box const& box, L&& f) noexcept +template +void HostDeviceFor (BoxND const& box, L&& f) noexcept { amrex::ignore_unused(MT); For(box,std::forward(f)); } -template > > -void HostDeviceFor (Box const& box, T ncomp, L&& f) noexcept +template > > +void HostDeviceFor (BoxND const& box, T ncomp, L&& f) noexcept { For(box,ncomp,std::forward(f)); } -template > > -void HostDeviceFor (Box const& box, T ncomp, L&& f) noexcept +template > > +void HostDeviceFor (BoxND const& box, T ncomp, L&& f) noexcept { amrex::ignore_unused(MT); For(box,ncomp,std::forward(f)); } -template -void HostDeviceFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { For(box1,box2,std::forward(f1),std::forward(f2)); } -template -void HostDeviceFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { amrex::ignore_unused(MT); For(box1,box2,std::forward(f1),std::forward(f2)); } -template -void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, +template +void HostDeviceFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template -void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, +template +void HostDeviceFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { amrex::ignore_unused(MT); For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template >, typename M2=std::enable_if_t> > -void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void HostDeviceFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t> > -void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void HostDeviceFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { amrex::ignore_unused(MT); For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > -void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void HostDeviceFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { For(box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > -void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void HostDeviceFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { amrex::ignore_unused(MT); For(box1,ncomp1,std::forward(f1), @@ -817,105 +991,105 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept ParallelFor(n,std::forward(f)); } -template -void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, BoxND const& box, L&& f) noexcept { ParallelFor(box,std::forward(f)); } -template -void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, BoxND const& box, L&& f) noexcept { amrex::ignore_unused(MT); ParallelFor(box,std::forward(f)); } -template > > -void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +template > > +void HostDeviceParallelFor (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(box,ncomp,std::forward(f)); } -template > > -void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +template > > +void HostDeviceParallelFor (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L&& f) noexcept { amrex::ignore_unused(MT); ParallelFor(box,ncomp,std::forward(f)); } -template -void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } -template -void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } -template +template void HostDeviceParallelFor (Gpu::KernelInfo const&, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template +template void HostDeviceParallelFor (Gpu::KernelInfo const&, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template >, typename M2=std::enable_if_t> > void HostDeviceParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t> > void HostDeviceParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > void HostDeviceParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > void HostDeviceParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { amrex::ignore_unused(MT); ParallelFor(box1,ncomp1,std::forward(f1), @@ -936,105 +1110,105 @@ void HostDeviceFor (Gpu::KernelInfo const&, T n, L&& f) noexcept For(n,std::forward(f)); } -template -void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +template +void HostDeviceFor (Gpu::KernelInfo const&, BoxND const& box, L&& f) noexcept { For(box,std::forward(f)); } -template -void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +template +void HostDeviceFor (Gpu::KernelInfo const&, BoxND const& box, L&& f) noexcept { amrex::ignore_unused(MT); For(box,std::forward(f)); } -template > > -void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +template > > +void HostDeviceFor (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L&& f) noexcept { For(box,ncomp,std::forward(f)); } -template > > -void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +template > > +void HostDeviceFor (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L&& f) noexcept { amrex::ignore_unused(MT); For(box,ncomp,std::forward(f)); } -template -void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceFor (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { For(box1,box2,std::forward(f1),std::forward(f2)); } -template -void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceFor (Gpu::KernelInfo const&, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { amrex::ignore_unused(MT); For(box1,box2,std::forward(f1),std::forward(f2)); } -template +template void HostDeviceFor (Gpu::KernelInfo const&, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template +template void HostDeviceFor (Gpu::KernelInfo const&, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { amrex::ignore_unused(MT); For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template >, typename M2=std::enable_if_t> > void HostDeviceFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t> > void HostDeviceFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { amrex::ignore_unused(MT); For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > void HostDeviceFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { For(box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } -template >, typename M2=std::enable_if_t>, typename M3=std::enable_if_t> > void HostDeviceFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { amrex::ignore_unused(MT); For(box1,ncomp1,std::forward(f1), @@ -1051,31 +1225,81 @@ void ParallelForRNG (T n, L const& f) noexcept } } -template +namespace detail { + +template +AMREX_FORCE_INLINE +void ParallelForRNG_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv) noexcept +{ + if constexpr (idim == 1) { + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_engine(f,iv,RandomEngine{}); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_engine(f,iv,RandomEngine{}); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_engine(f,iv,RandomEngine{}); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + ParallelForRNG_impND(f, lo, hi, iv); + } + } +} + +template +AMREX_FORCE_INLINE +void ParallelForRNG_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv, T n) noexcept +{ + if constexpr (idim == 1) { + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_engine(f,iv,n,RandomEngine{}); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_engine(f,iv,n,RandomEngine{}); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_engine(f,iv,n,RandomEngine{}); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + ParallelForRNG_impND(f, lo, hi, iv, n); + } + } +} + +} + +template AMREX_ATTRIBUTE_FLATTEN_FOR -void ParallelForRNG (Box const& box, L const& f) noexcept +void ParallelForRNG (BoxND const& box, L const& f) noexcept { - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k,RandomEngine{}); - }}} + const auto lo = amrex::lbound_iv(box); + const auto hi = amrex::ubound_iv(box); + IntVectND iv; + detail::ParallelForRNG_impND(f, lo, hi, iv); } -template > > +template > > AMREX_ATTRIBUTE_FLATTEN_FOR -void ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept +void ParallelForRNG (BoxND const& box, T ncomp, L const& f) noexcept { - const auto lo = amrex::lbound(box); - const auto hi = amrex::ubound(box); + const auto lo = amrex::lbound_iv(box); + const auto hi = amrex::ubound_iv(box); + IntVectND iv; for (T n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k,n,RandomEngine{}); - }}} + detail::ParallelForRNG_impND(f, lo, hi, iv, n); } } diff --git a/Src/Base/AMReX_GpuLaunchFunctsG.H b/Src/Base/AMReX_GpuLaunchFunctsG.H index dbc36e47383..7955410f8ba 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsG.H +++ b/Src/Base/AMReX_GpuLaunchFunctsG.H @@ -4,6 +4,146 @@ namespace amrex { +namespace detail { + + // call_f_scalar_handler + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_scalar_handler (F const& f, N i, Gpu::Handler const&) + noexcept -> decltype(f(0)) + { + f(i); + } + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_scalar_handler (F const& f, N i, Gpu::Handler const& handler) + noexcept -> decltype(f(0,Gpu::Handler{})) + { + f(i, handler); + } + + // call_f_intvect_inner + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND<1> iv, Args...args) + noexcept -> decltype(f(0, 0, 0, args...)) + { + f(iv[0], 0, 0, args...); + } + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND<2> iv, Args...args) + noexcept -> decltype(f(0, 0, 0, args...)) + { + f(iv[0], iv[1], 0, args...); + } + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND iv, Args...args) + noexcept -> decltype(f(iv, args...)) + { + f(iv, args...); + } + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND iv, Args...args) + noexcept -> decltype(f(iv[Ns]..., args...)) + { + f(iv[Ns]..., args...); + } + + // call_f_intvect + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect (F const& f, IntVectND iv) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv)) + { + call_f_intvect_inner(std::make_index_sequence(), f, iv); + } + + // call_f_intvect_engine + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_engine (F const& f, IntVectND iv, RandomEngine engine) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, engine)) + { + call_f_intvect_inner(std::make_index_sequence(), f, iv, engine); + } + + // call_f_intvect_handler + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_handler (F const& f, IntVectND iv, Gpu::Handler const&) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv)) + { + call_f_intvect_inner(std::make_index_sequence(), f, iv); + } + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_handler (F const& f, IntVectND iv, Gpu::Handler const& handler) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, Gpu::Handler{})) + { + call_f_intvect_inner(std::make_index_sequence(), f, iv, handler); + } + + // call_f_intvect_ncomp + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_ncomp (F const& f, IntVectND iv, T ncomp) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, 0)) + { + for (T n = 0; n < ncomp; ++n) { + call_f_intvect_inner(std::make_index_sequence(), f, iv, n); + } + } + + // call_f_intvect_ncomp_engine + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_ncomp_engine (F const& f, IntVectND iv, T ncomp, RandomEngine engine) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, 0, engine)) + { + for (T n = 0; n < ncomp; ++n) { + call_f_intvect_inner(std::make_index_sequence(), f, iv, n, engine); + } + } + + // call_f_intvect_ncomp_handler + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_ncomp_handler (F const& f, IntVectND iv, T ncomp, Gpu::Handler const&) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, 0)) + { + for (T n = 0; n < ncomp; ++n) { + call_f_intvect_inner(std::make_index_sequence(), f, iv, n); + } + } + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_ncomp_handler (F const& f, IntVectND iv, T ncomp, Gpu::Handler const& handler) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, 0, Gpu::Handler{})) + { + for (T n = 0; n < ncomp; ++n) { + call_f_intvect_inner(std::make_index_sequence(), f, iv, n, handler); + } + } + +} + #ifdef AMREX_USE_SYCL template @@ -137,56 +277,6 @@ void launch (T const& n, L const& f) noexcept } } -namespace detail { - template - AMREX_GPU_DEVICE - auto call_f (F const& f, N i, Gpu::Handler const&) - noexcept -> decltype(f(0)) - { - f(i); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, N i, Gpu::Handler const& handler) - noexcept -> decltype(f(0,Gpu::Handler{})) - { - f(i,handler); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, Gpu::Handler const&) - noexcept -> decltype(f(0,0,0)) - { - f(i,j,k); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, Gpu::Handler const& handler) - noexcept -> decltype(f(0,0,0,Gpu::Handler{})) - { - f(i,j,k,handler); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, T ncomp, Gpu::Handler const&) - noexcept -> decltype(f(0,0,0,0)) - { - for (T n = 0; n < ncomp; ++n) f(i,j,k,n); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, T ncomp, Gpu::Handler const& handler) - noexcept -> decltype(f(0,0,0,0,Gpu::Handler{})) - { - for (T n = 0; n < ncomp; ++n) f(i,j,k,n,handler); - } -} - template ::value> > void ParallelFor (Gpu::KernelInfo const& info, T n, L const& f) noexcept { @@ -210,7 +300,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L const& f) noexcept i < std::size_t(n); i += stride) { int n_active_threads = amrex::min(std::size_t(n)-i+item.get_local_id(0), item.get_local_range(0)); - detail::call_f(f, T(i), Gpu::Handler{&item, shared_data.get_multi_ptr().get(), + detail::call_f_scalar_handler(f, T(i), Gpu::Handler{&item, shared_data.get_multi_ptr().get(), n_active_threads}); } }); @@ -225,7 +315,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L const& f) noexcept { for (std::size_t i = item.get_global_id(0), stride = item.get_global_range(0); i < std::size_t(n); i += stride) { - detail::call_f(f, T(i), Gpu::Handler{&item}); + detail::call_f_scalar_handler(f, T(i), Gpu::Handler{&item}); } }); }); @@ -235,11 +325,11 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L const& f) noexcept } } -template -void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L const& f) noexcept +template +void ParallelFor (Gpu::KernelInfo const& info, BoxND const& box, L const& f) noexcept { if (amrex::isEmpty(box)) { return; } - const BoxIndexer indexer(box); + const BoxIndexerND indexer(box); const auto ec = Gpu::makeExecutionConfig(box.numPts()); const auto nthreads_per_block = ec.numThreads.x; const auto nthreads_total = std::size_t(nthreads_per_block) * ec.numBlocks.x; @@ -257,10 +347,10 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L const& f) noexc { for (std::uint64_t icell = item.get_global_id(0), stride = item.get_global_range(0); icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); + auto iv = indexer.intVect(icell); int n_active_threads = amrex::min(indexer.numPts()-icell+std::uint64_t(item.get_local_id(0)), std::uint64_t(item.get_local_range(0))); - detail::call_f(f, i, j, k, Gpu::Handler{&item, shared_data.get_multi_ptr().get(), + detail::call_f_intvect_handler(f, iv, Gpu::Handler{&item, shared_data.get_multi_ptr().get(), n_active_threads}); } }); @@ -275,8 +365,8 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L const& f) noexc { for (std::uint64_t icell = item.get_global_id(0), stride = item.get_global_range(0); icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); - detail::call_f(f,i,j,k,Gpu::Handler{&item}); + auto iv = indexer.intVect(icell); + detail::call_f_intvect_handler(f,iv,Gpu::Handler{&item}); } }); }); @@ -286,11 +376,11 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L const& f) noexc } } -template ::value> > -void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L const& f) noexcept +template ::value> > +void ParallelFor (Gpu::KernelInfo const& info, BoxND const& box, T ncomp, L const& f) noexcept { if (amrex::isEmpty(box)) { return; } - const BoxIndexer indexer(box); + const BoxIndexerND indexer(box); const auto ec = Gpu::makeExecutionConfig(box.numPts()); const auto nthreads_per_block = ec.numThreads.x; const auto nthreads_total = std::size_t(nthreads_per_block) * ec.numBlocks.x; @@ -308,10 +398,10 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L const& { for (std::uint64_t icell = item.get_global_id(0), stride = item.get_global_range(0); icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); + auto iv = indexer.intVect(icell); int n_active_threads = amrex::min(indexer.numPts()-icell+std::uint64_t(item.get_local_id(0)), std::uint64_t(item.get_local_range(0))); - detail::call_f(f, i, j, k, ncomp, + detail::call_f_intvect_ncomp_handler(f, iv, ncomp, Gpu::Handler{&item, shared_data.get_multi_ptr().get(), n_active_threads}); } @@ -327,8 +417,8 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L const& { for (std::uint64_t icell = item.get_global_id(0), stride = item.get_global_range(0); icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); - detail::call_f(f,i,j,k,ncomp,Gpu::Handler{&item}); + auto iv = indexer.intVect(icell); + detail::call_f_intvect_ncomp_handler(f,iv,ncomp,Gpu::Handler{&item}); } }); }); @@ -371,11 +461,11 @@ void ParallelForRNG (T n, L const& f) noexcept } } -template -void ParallelForRNG (Box const& box, L const& f) noexcept +template +void ParallelForRNG (BoxND const& box, L const& f) noexcept { if (amrex::isEmpty(box)) { return; } - const BoxIndexer indexer(box); + const BoxIndexerND indexer(box); const auto ec = Gpu::ExecutionConfig(box.numPts()); const auto nthreads_per_block = ec.numThreads.x; const auto nthreads_total = std::size_t(nthreads_per_block) * amrex::min(ec.numBlocks.x,Gpu::Device::maxBlocksPerLaunch()); @@ -395,8 +485,8 @@ void ParallelForRNG (Box const& box, L const& f) noexcept RandomEngine rand_eng{&engine}; for (std::uint64_t icell = tid, stride = item.get_global_range(0); icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); - f(i,j,k,rand_eng); + auto iv = indexer.intVect(icell); + detail::call_f_intvect_engine(f,iv,rand_eng); } engine_acc.store(engine, tid); }); @@ -407,11 +497,11 @@ void ParallelForRNG (Box const& box, L const& f) noexcept } } -template ::value> > -void ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept +template ::value> > +void ParallelForRNG (BoxND const& box, T ncomp, L const& f) noexcept { if (amrex::isEmpty(box)) { return; } - const BoxIndexer indexer(box); + const BoxIndexerND indexer(box); const auto ec = Gpu::ExecutionConfig(box.numPts()); const auto nthreads_per_block = ec.numThreads.x; const auto nthreads_total = std::size_t(nthreads_per_block) * amrex::min(ec.numBlocks.x,Gpu::Device::maxBlocksPerLaunch()); @@ -431,10 +521,8 @@ void ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept RandomEngine rand_eng{&engine}; for (std::uint64_t icell = tid, stride = item.get_global_range(0); icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); - for (T n = 0; n < ncomp; ++n) { - f(i,j,k,n,rand_eng); - } + auto iv = indexer.intVect(icell); + detail::call_f_intvect_ncomp_engine(f,iv,ncomp,rand_eng); } engine_acc.store(engine, tid); }); @@ -445,12 +533,12 @@ void ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept } } -template -void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void ParallelFor (Gpu::KernelInfo const& /*info*/, BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2)) { return; } - const BoxIndexer indexer1(box1); - const BoxIndexer indexer2(box2); + const BoxIndexerND indexer1(box1); + const BoxIndexerND indexer2(box2); const auto ec = Gpu::makeExecutionConfig(std::max(box1.numPts(), box2.numPts())); const auto nthreads_per_block = ec.numThreads.x; const auto nthreads_total = std::size_t(nthreads_per_block) * ec.numBlocks.x; @@ -467,12 +555,12 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b for (std::uint64_t icell = item.get_global_id(0), stride = item.get_global_range(0); icell < ncells; icell += stride) { if (icell < indexer1.numPts()) { - auto [i, j, k] = indexer1(icell); - f1(i,j,k); + auto iv = indexer1.intVect(icell); + detail::call_f_intvect(f1,iv); } if (icell < indexer2.numPts()) { - auto [i, j, k] = indexer2(icell); - f2(i,j,k); + auto iv = indexer2.intVect(icell); + detail::call_f_intvect(f2,iv); } } }); @@ -482,15 +570,15 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b } } -template +template void ParallelFor (Gpu::KernelInfo const& /*info*/, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2) && amrex::isEmpty(box3)) { return; } - const BoxIndexer indexer1(box1); - const BoxIndexer indexer2(box2); - const BoxIndexer indexer3(box3); + const BoxIndexerND indexer1(box1); + const BoxIndexerND indexer2(box2); + const BoxIndexerND indexer3(box3); const auto ec = Gpu::makeExecutionConfig(std::max({box1.numPts(),box2.numPts(),box3.numPts()})); const auto nthreads_per_block = ec.numThreads.x; const auto nthreads_total = std::size_t(nthreads_per_block) * ec.numBlocks.x; @@ -507,16 +595,16 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, for (std::uint64_t icell = item.get_global_id(0), stride = item.get_global_range(0); icell < ncells; icell += stride) { if (icell < indexer1.numPts()) { - auto [i, j, k] = indexer1(icell); - f1(i,j,k); + auto iv = indexer1.intVect(icell); + detail::call_f_intvect(f1,iv); } if (icell < indexer2.numPts()) { - auto [i, j, k] = indexer2(icell); - f2(i,j,k); + auto iv = indexer2.intVect(icell); + detail::call_f_intvect(f2,iv); } if (icell < indexer3.numPts()) { - auto [i, j, k] = indexer3(icell); - f3(i,j,k); + auto iv = indexer3.intVect(icell); + detail::call_f_intvect(f3,iv); } } }); @@ -526,16 +614,16 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, } } -template ::value>, typename M2=std::enable_if_t::value> > void ParallelFor (Gpu::KernelInfo const& /*info*/, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2)) { return; } - const BoxIndexer indexer1(box1); - const BoxIndexer indexer2(box2); + const BoxIndexerND indexer1(box1); + const BoxIndexerND indexer2(box2); const auto ec = Gpu::makeExecutionConfig(std::max(box1.numPts(),box2.numPts())); const auto nthreads_per_block = ec.numThreads.x; const auto nthreads_total = std::size_t(nthreads_per_block) * ec.numBlocks.x; @@ -552,16 +640,12 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, for (std::uint64_t icell = item.get_global_id(0), stride = item.get_global_range(0); icell < ncells; icell += stride) { if (icell < indexer1.numPts()) { - auto [i, j, k] = indexer1(icell); - for (T1 n = 0; n < ncomp1; ++n) { - f1(i,j,k,n); - } + auto iv = indexer1.intVect(icell); + detail::call_f_intvect_ncomp(f1,iv,ncomp1); } if (icell < indexer2.numPts()) { - auto [i, j, k] = indexer2(icell); - for (T2 n = 0; n < ncomp2; ++n) { - f2(i,j,k,n); - } + auto iv = indexer2.intVect(icell); + detail::call_f_intvect_ncomp(f2,iv,ncomp2); } } }); @@ -571,19 +655,19 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, } } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > void ParallelFor (Gpu::KernelInfo const& /*info*/, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2) && amrex::isEmpty(box3)) { return; } - const BoxIndexer indexer1(box1); - const BoxIndexer indexer2(box2); - const BoxIndexer indexer3(box3); + const BoxIndexerND indexer1(box1); + const BoxIndexerND indexer2(box2); + const BoxIndexerND indexer3(box3); const auto ec = Gpu::makeExecutionConfig(std::max({box1.numPts(),box2.numPts(),box3.numPts()})); const auto nthreads_per_block = ec.numThreads.x; const auto nthreads_total = std::size_t(nthreads_per_block) * ec.numBlocks.x; @@ -600,22 +684,16 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, for (std::uint64_t icell = item.get_global_id(0), stride = item.get_global_range(0); icell < ncells; icell += stride) { if (icell < indexer1.numPts()) { - auto [i, j, k] = indexer1(icell); - for (T1 n = 0; n < ncomp1; ++n) { - f1(i,j,k,n); - } + auto iv = indexer1.intVect(icell); + detail::call_f_intvect_ncomp(f1,iv,ncomp1); } if (icell < indexer2.numPts()) { - auto [i, j, k] = indexer2(icell); - for (T2 n = 0; n < ncomp2; ++n) { - f2(i,j,k,n); - } + auto iv = indexer2.intVect(icell); + detail::call_f_intvect_ncomp(f2,iv,ncomp2); } if (icell < indexer3.numPts()) { - auto [i, j, k] = indexer3(icell); - for (T3 n = 0; n < ncomp3; ++n) { - f3(i,j,k,n); - } + auto iv = indexer3.intVect(icell); + detail::call_f_intvect_ncomp(f3,iv,ncomp3); } } }); @@ -683,56 +761,6 @@ void launch (T const& n, L const& f) noexcept AMREX_GPU_ERROR_CHECK(); } -namespace detail { - template - AMREX_GPU_DEVICE - auto call_f (F const& f, N i, std::uint64_t /*nleft*/) - noexcept -> decltype(f(0)) - { - f(i); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, N i, std::uint64_t nleft) - noexcept -> decltype(f(0,Gpu::Handler{})) - { - f(i,Gpu::Handler(amrex::min(nleft,(std::uint64_t)blockDim.x))); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, std::uint64_t /*nleft*/) - noexcept -> decltype(f(0,0,0)) - { - f(i,j,k); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, std::uint64_t nleft) - noexcept -> decltype(f(0,0,0,Gpu::Handler{})) - { - f(i,j,k,Gpu::Handler(amrex::min(nleft,(std::uint64_t)blockDim.x))); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, T ncomp, std::uint64_t /*nleft*/) - noexcept -> decltype(f(0,0,0,0)) - { - for (T n = 0; n < ncomp; ++n) f(i,j,k,n); - } - - template - AMREX_GPU_DEVICE - auto call_f (F const& f, int i, int j, int k, T ncomp, std::uint64_t nleft) - noexcept -> decltype(f(0,0,0,0,Gpu::Handler{})) - { - for (T n = 0; n < ncomp; ++n) f(i,j,k,n,Gpu::Handler(amrex::min(nleft,(std::uint64_t)blockDim.x))); - } -} - template ::value> > std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, T n, L const& f) noexcept @@ -743,44 +771,50 @@ ParallelFor (Gpu::KernelInfo const&, T n, L const& f) noexcept [=] AMREX_GPU_DEVICE () noexcept { for (Long i = Long(blockDim.x)*blockIdx.x+threadIdx.x, stride = Long(blockDim.x)*gridDim.x; i < Long(n); i += stride) { - detail::call_f(f, T(i), (Long(n)-i+(Long)threadIdx.x)); + detail::call_f_scalar_handler(f, T(i), + Gpu::Handler(amrex::min((std::uint64_t(n)-i+(std::uint64_t)threadIdx.x), + (std::uint64_t)blockDim.x))); } }); AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value> -ParallelFor (Gpu::KernelInfo const&, Box const& box, L const& f) noexcept +ParallelFor (Gpu::KernelInfo const&, BoxND const& box, L const& f) noexcept { if (amrex::isEmpty(box)) { return; } - const BoxIndexer indexer(box); + const BoxIndexerND indexer(box); const auto ec = Gpu::makeExecutionConfig(box.numPts()); AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (std::uint64_t icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x, stride = std::uint64_t(blockDim.x)*gridDim.x; icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); - detail::call_f(f, i, j, k, (indexer.numPts()-icell+(std::uint64_t)threadIdx.x)); + auto iv = indexer.intVect(icell); + detail::call_f_intvect_handler(f, iv, + Gpu::Handler(amrex::min((indexer.numPts()-icell+(std::uint64_t)threadIdx.x), + (std::uint64_t)blockDim.x))); } }); AMREX_GPU_ERROR_CHECK(); } -template ::value> > +template ::value> > std::enable_if_t::value> -ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L const& f) noexcept +ParallelFor (Gpu::KernelInfo const&, BoxND const& box, T ncomp, L const& f) noexcept { if (amrex::isEmpty(box)) { return; } - const BoxIndexer indexer(box); + const BoxIndexerND indexer(box); const auto ec = Gpu::makeExecutionConfig(box.numPts()); AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (std::uint64_t icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x, stride = std::uint64_t(blockDim.x)*gridDim.x; icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); - detail::call_f(f, i, j, k, ncomp, (indexer.numPts()-icell+(std::uint64_t)threadIdx.x)); + auto iv = indexer.intVect(icell); + detail::call_f_intvect_ncomp_handler(f, iv, ncomp, + Gpu::Handler(amrex::min((indexer.numPts()-icell+(std::uint64_t)threadIdx.x), + (std::uint64_t)blockDim.x))); } }); AMREX_GPU_ERROR_CHECK(); @@ -807,13 +841,13 @@ ParallelForRNG (T n, L const& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value> -ParallelForRNG (Box const& box, L const& f) noexcept +ParallelForRNG (BoxND const& box, L const& f) noexcept { if (amrex::isEmpty(box)) { return; } randState_t* rand_state = getRandState(); - const BoxIndexer indexer(box); + const BoxIndexerND indexer(box); const auto ec = Gpu::ExecutionConfig(box.numPts()); AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), @@ -822,21 +856,21 @@ ParallelForRNG (Box const& box, L const& f) noexcept auto const tid = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x; RandomEngine engine{&(rand_state[tid])}; for (std::uint64_t icell = tid, stride = std::uint64_t(blockDim.x)*gridDim.x; icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); - f(i,j,k,engine); + auto iv = indexer.intVect(icell); + detail::call_f_intvect_engine(f, iv, engine); } }); Gpu::streamSynchronize(); // To avoid multiple streams using RNG AMREX_GPU_ERROR_CHECK(); } -template ::value> > +template ::value> > std::enable_if_t::value> -ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept +ParallelForRNG (BoxND const& box, T ncomp, L const& f) noexcept { if (amrex::isEmpty(box)) { return; } randState_t* rand_state = getRandState(); - const BoxIndexer indexer(box); + const BoxIndexerND indexer(box); const auto ec = Gpu::ExecutionConfig(box.numPts()); AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), @@ -845,24 +879,22 @@ ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept auto const tid = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x; RandomEngine engine{&(rand_state[tid])}; for (std::uint64_t icell = tid, stride = std::uint64_t(blockDim.x)*gridDim.x; icell < indexer.numPts(); icell += stride) { - auto [i, j, k] = indexer(icell); - for (T n = 0; n < ncomp; ++n) { - f(i,j,k,n,engine); - } + auto iv = indexer.intVect(icell); + detail::call_f_intvect_ncomp_engine(f, iv, ncomp, engine); } }); Gpu::streamSynchronize(); // To avoid multiple streams using RNG AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, - Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept + BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2)) { return; } - const BoxIndexer indexer1(box1); - const BoxIndexer indexer2(box2); + const BoxIndexerND indexer1(box1); + const BoxIndexerND indexer2(box2); const auto ec = Gpu::makeExecutionConfig(std::max(box1.numPts(),box2.numPts())); AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { @@ -870,28 +902,28 @@ ParallelFor (Gpu::KernelInfo const&, for (std::uint64_t icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x, stride = std::uint64_t(blockDim.x)*gridDim.x; icell < ncells; icell += stride) { if (icell < indexer1.numPts()) { - auto [i, j, k] = indexer1(icell); - f1(i,j,k); + auto iv = indexer1.intVect(icell); + detail::call_f_intvect(f1, iv); } if (icell < indexer2.numPts()) { - auto [i, j, k] = indexer2(icell); - f2(i,j,k); + auto iv = indexer2.intVect(icell); + detail::call_f_intvect(f2, iv); } } }); AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2) && amrex::isEmpty(box3)) { return; } - const BoxIndexer indexer1(box1); - const BoxIndexer indexer2(box2); - const BoxIndexer indexer3(box3); + const BoxIndexerND indexer1(box1); + const BoxIndexerND indexer2(box2); + const BoxIndexerND indexer3(box3); const auto ec = Gpu::makeExecutionConfig(std::max({box1.numPts(),box2.numPts(),box3.numPts()})); AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { @@ -899,33 +931,33 @@ ParallelFor (Gpu::KernelInfo const&, for (std::uint64_t icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x, stride = std::uint64_t(blockDim.x)*gridDim.x; icell < ncells; icell += stride) { if (icell < indexer1.numPts()) { - auto [i, j, k] = indexer1(icell); - f1(i,j,k); + auto iv = indexer1.intVect(icell); + detail::call_f_intvect(f1, iv); } if (icell < indexer2.numPts()) { - auto [i, j, k] = indexer2(icell); - f2(i,j,k); + auto iv = indexer2.intVect(icell); + detail::call_f_intvect(f2, iv); } if (icell < indexer3.numPts()) { - auto [i, j, k] = indexer3(icell); - f3(i,j,k); + auto iv = indexer3.intVect(icell); + detail::call_f_intvect(f3, iv); } } }); AMREX_GPU_ERROR_CHECK(); } -template ::value>, typename M2=std::enable_if_t::value> > std::enable_if_t::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2)) { return; } - const BoxIndexer indexer1(box1); - const BoxIndexer indexer2(box2); + const BoxIndexerND indexer1(box1); + const BoxIndexerND indexer2(box2); const auto ec = Gpu::makeExecutionConfig(std::max(box1.numPts(),box2.numPts())); AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { @@ -933,36 +965,32 @@ ParallelFor (Gpu::KernelInfo const&, for (std::uint64_t icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x, stride = std::uint64_t(blockDim.x)*gridDim.x; icell < ncells; icell += stride) { if (icell < indexer1.numPts()) { - auto [i, j, k] = indexer1(icell); - for (T1 n = 0; n < ncomp1; ++n) { - f1(i,j,k,n); - } + auto iv = indexer1.intVect(icell); + detail::call_f_intvect_ncomp(f1, iv, ncomp1); } if (icell < indexer2.numPts()) { - auto [i, j, k] = indexer2(icell); - for (T2 n = 0; n < ncomp2; ++n) { - f2(i,j,k,n); - } + auto iv = indexer2.intVect(icell); + detail::call_f_intvect_ncomp(f2, iv, ncomp2); } } }); AMREX_GPU_ERROR_CHECK(); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2) && amrex::isEmpty(box3)) { return; } - const BoxIndexer indexer1(box1); - const BoxIndexer indexer2(box2); - const BoxIndexer indexer3(box3); + const BoxIndexerND indexer1(box1); + const BoxIndexerND indexer2(box2); + const BoxIndexerND indexer3(box3); const auto ec = Gpu::makeExecutionConfig(std::max({box1.numPts(),box2.numPts(),box3.numPts()})); AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { @@ -970,22 +998,16 @@ ParallelFor (Gpu::KernelInfo const&, for (std::uint64_t icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x, stride = std::uint64_t(blockDim.x)*gridDim.x; icell < ncells; icell += stride) { if (icell < indexer1.numPts()) { - auto [i, j, k] = indexer1(icell); - for (T1 n = 0; n < ncomp1; ++n) { - f1(i,j,k,n); - } + auto iv = indexer1.intVect(icell); + detail::call_f_intvect_ncomp(f1, iv, ncomp1); } if (icell < indexer2.numPts()) { - auto [i, j, k] = indexer2(icell); - for (T2 n = 0; n < ncomp2; ++n) { - f2(i,j,k,n); - } + auto iv = indexer2.intVect(icell); + detail::call_f_intvect_ncomp(f2, iv, ncomp2); } if (icell < indexer3.numPts()) { - auto [i, j, k] = indexer3(icell); - for (T3 n = 0; n < ncomp3; ++n) { - f3(i,j,k,n); - } + auto iv = indexer3.intVect(icell); + detail::call_f_intvect_ncomp(f3, iv, ncomp3); } } }); @@ -1013,60 +1035,60 @@ ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept ParallelFor(info, n, std::forward(f)); } -template +template std::enable_if_t::value> -ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +ParallelFor (Gpu::KernelInfo const& info, BoxND const& box, L&& f) noexcept { ParallelFor(info, box, std::forward(f)); } -template ::value> > +template ::value> > std::enable_if_t::value> -ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +ParallelFor (Gpu::KernelInfo const& info, BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(info, box, ncomp, std::forward(f)); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept + BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(info, box1, box2, std::forward(f1), std::forward(f2)); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(info, box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value> > std::enable_if_t::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(info, box1, ncomp1, std::forward(f1), box2, ncomp2, std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(info, box1, ncomp1, std::forward(f1), box2, ncomp2, std::forward(f2), @@ -1085,88 +1107,88 @@ void For (Gpu::KernelInfo const& info, T n, L&& f) noexcept ParallelFor(info, n,std::forward(f)); } -template -void For (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +template +void For (Gpu::KernelInfo const& info, BoxND const& box, L&& f) noexcept { ParallelFor(info, box,std::forward(f)); } -template -void For (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +template +void For (Gpu::KernelInfo const& info, BoxND const& box, L&& f) noexcept { ParallelFor(info, box,std::forward(f)); } -template ::value> > -void For (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void For (Gpu::KernelInfo const& info, BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(info,box,ncomp,std::forward(f)); } -template ::value> > -void For (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void For (Gpu::KernelInfo const& info, BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(info,box,ncomp,std::forward(f)); } -template +template void For (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept + BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } -template +template void For (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept + BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } -template +template void For (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template +template void For (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value> > void For (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value> > void For (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > void For (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(info, box1,ncomp1,std::forward(f1), @@ -1174,14 +1196,14 @@ void For (Gpu::KernelInfo const& info, box3,ncomp3,std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > void For (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(info, box1,ncomp1,std::forward(f1), @@ -1201,81 +1223,81 @@ void ParallelFor (T n, L&& f) noexcept ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); } -template -void ParallelFor (Box const& box, L&& f) noexcept +template +void ParallelFor (BoxND const& box, L&& f) noexcept { ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); } -template -void ParallelFor (Box const& box, L&& f) noexcept +template +void ParallelFor (BoxND const& box, L&& f) noexcept { ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); } -template ::value> > -void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void ParallelFor (BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } -template ::value> > -void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void ParallelFor (BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } -template -void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void ParallelFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } -template -void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void ParallelFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } -template -void ParallelFor (Box const& box1, Box const& box2, Box const& box3, +template +void ParallelFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template -void ParallelFor (Box const& box1, Box const& box2, Box const& box3, +template +void ParallelFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value> > -void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void ParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value> > -void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void ParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > -void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void ParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), @@ -1283,13 +1305,13 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > -void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void ParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), @@ -1309,81 +1331,81 @@ void For (T n, L&& f) noexcept ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); } -template -void For (Box const& box, L&& f) noexcept +template +void For (BoxND const& box, L&& f) noexcept { ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); } -template -void For (Box const& box, L&& f) noexcept +template +void For (BoxND const& box, L&& f) noexcept { ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); } -template ::value> > -void For (Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void For (BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } -template ::value> > -void For (Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void For (BoxND const& box, T ncomp, L&& f) noexcept { ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } -template -void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void For (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } -template -void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void For (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } -template -void For (Box const& box1, Box const& box2, Box const& box3, +template +void For (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template -void For (Box const& box1, Box const& box2, Box const& box3, +template +void For (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value> > -void For (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void For (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value> > -void For (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void For (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > -void For (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void For (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), @@ -1391,13 +1413,13 @@ void For (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > -void For (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void For (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), @@ -1451,9 +1473,9 @@ HostDeviceParallelFor (T n, L&& f) noexcept HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); } -template +template std::enable_if_t::value> -HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +HostDeviceParallelFor (Gpu::KernelInfo const& info, BoxND const& box, L&& f) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info, box,std::forward(f)); @@ -1466,9 +1488,9 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexc } } -template +template std::enable_if_t::value> -HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +HostDeviceParallelFor (Gpu::KernelInfo const& info, BoxND const& box, L&& f) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info, box,std::forward(f)); @@ -1481,9 +1503,9 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexc } } -template ::value> > +template ::value> > std::enable_if_t::value> -HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +HostDeviceParallelFor (Gpu::KernelInfo const& info, BoxND const& box, T ncomp, L&& f) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info, box,ncomp,std::forward(f)); @@ -1496,9 +1518,9 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& } } -template ::value> > +template ::value> > std::enable_if_t::value> -HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +HostDeviceParallelFor (Gpu::KernelInfo const& info, BoxND const& box, T ncomp, L&& f) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info, box,ncomp,std::forward(f)); @@ -1511,10 +1533,10 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& } } -template +template std::enable_if_t::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept + BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); @@ -1528,10 +1550,10 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, } } -template +template std::enable_if_t::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept + BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); @@ -1545,10 +1567,10 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, } } -template +template std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { @@ -1565,13 +1587,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, } } -template ::value>, typename M2=std::enable_if_t::value> > std::enable_if_t::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); @@ -1585,13 +1607,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, } } -template ::value>, typename M2=std::enable_if_t::value> > std::enable_if_t::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); @@ -1605,15 +1627,15 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, } } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info, @@ -1631,15 +1653,15 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, } } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { ParallelFor(info, @@ -1669,90 +1691,90 @@ void HostDeviceFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept HostDeviceParallelFor(info,n,std::forward(f)); } -template -void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +template +void HostDeviceFor (Gpu::KernelInfo const& info, BoxND const& box, L&& f) noexcept { HostDeviceParallelFor(info,box,std::forward(f)); } -template -void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +template +void HostDeviceFor (Gpu::KernelInfo const& info, BoxND const& box, L&& f) noexcept { HostDeviceParallelFor(info,box,std::forward(f)); } -template ::value> > -void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, BoxND const& box, T ncomp, L&& f) noexcept { HostDeviceParallelFor(info,box,ncomp,std::forward(f)); } -template ::value> > -void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, BoxND const& box, T ncomp, L&& f) noexcept { HostDeviceParallelFor(info,box,ncomp,std::forward(f)); } -template +template void HostDeviceFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept + BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } -template +template void HostDeviceFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept + BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } -template +template void HostDeviceFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { HostDeviceParallelFor(info, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } -template +template void HostDeviceFor (Gpu::KernelInfo const& info, - Box const& box1, Box const& box2, Box const& box3, + BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { HostDeviceParallelFor(info, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value> > void HostDeviceFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value> > void HostDeviceFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > void HostDeviceFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { HostDeviceParallelFor(info, box1,ncomp1,std::forward(f1), @@ -1760,14 +1782,14 @@ void HostDeviceFor (Gpu::KernelInfo const& info, box3,ncomp3,std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > void HostDeviceFor (Gpu::KernelInfo const& info, - Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept + BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { HostDeviceParallelFor(info, box1,ncomp1,std::forward(f1), @@ -1787,83 +1809,83 @@ void HostDeviceParallelFor (T n, L&& f) noexcept HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); } -template -void HostDeviceParallelFor (Box const& box, L&& f) noexcept +template +void HostDeviceParallelFor (BoxND const& box, L&& f) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); } -template -void HostDeviceParallelFor (Box const& box, L&& f) noexcept +template +void HostDeviceParallelFor (BoxND const& box, L&& f) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); } -template ::value> > -void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void HostDeviceParallelFor (BoxND const& box, T ncomp, L&& f) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } -template ::value> > -void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +template ::value> > +void HostDeviceParallelFor (BoxND const& box, T ncomp, L&& f) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } -template -void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceParallelFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } -template -void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +template +void HostDeviceParallelFor (BoxND const& box1, BoxND const& box2, L1&& f1, L2&& f2) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } -template -void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, +template +void HostDeviceParallelFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } -template -void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, +template +void HostDeviceParallelFor (BoxND const& box1, BoxND const& box2, BoxND const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value> > -void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void HostDeviceParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value> > -void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2) noexcept +void HostDeviceParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > -void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void HostDeviceParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), @@ -1871,13 +1893,13 @@ void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > -void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, - Box const& box2, T2 ncomp2, L2&& f2, - Box const& box3, T3 ncomp3, L3&& f3) noexcept +void HostDeviceParallelFor (BoxND const& box1, T1 ncomp1, L1&& f1, + BoxND const& box2, T2 ncomp2, L2&& f2, + BoxND const& box3, T3 ncomp3, L3&& f3) noexcept { HostDeviceParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), diff --git a/Src/Base/AMReX_GpuQualifiers.H b/Src/Base/AMReX_GpuQualifiers.H index 4fba23a849a..3e10bec54df 100644 --- a/Src/Base/AMReX_GpuQualifiers.H +++ b/Src/Base/AMReX_GpuQualifiers.H @@ -64,4 +64,22 @@ # include #endif +#define AMREX_WRONG_NUM_ARGS(...) static_assert(false,"Wrong number of arguments to macro") + +#define AMREX_GET_DGV_MACRO(_1,_2,_3,NAME,...) NAME +#define AMREX_DEVICE_GLOBAL_VARIABLE(...) AMREX_GET_DGV_MACRO(__VA_ARGS__,\ + AMREX_DGVARR, AMREX_DGV,\ + AMREX_WRONG_NUM_ARGS)(__VA_ARGS__) + +#ifdef AMREX_USE_SYCL +# define AMREX_DGV(type,name) SYCL_EXTERNAL sycl::ext::oneapi::experimental::device_global name +# define AMREX_DGVARR(type,num,name) SYCL_EXTERNAL sycl::ext::oneapi::experimental::device_global name +#elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) +# define AMREX_DGV(type,name) __device__ type name +# define AMREX_DGVARR(type,num,name) __device__ type name[num] +#else +# define AMREX_DGV(type,name) type name +# define AMREX_DGVARR(type,num,name) type name[num] +#endif + #endif diff --git a/Src/Base/AMReX_GpuRange.H b/Src/Base/AMReX_GpuRange.H index ecf9a32fd2e..0b7fcd5a9c8 100644 --- a/Src/Base/AMReX_GpuRange.H +++ b/Src/Base/AMReX_GpuRange.H @@ -13,7 +13,8 @@ namespace amrex { template ,int> = 0> bool isEmpty (T n) noexcept { return n <= 0; } -AMREX_FORCE_INLINE bool isEmpty (Box const& b) noexcept { return b.isEmpty(); } +template +AMREX_FORCE_INLINE bool isEmpty (BoxND const& b) noexcept { return b.isEmpty(); } namespace Gpu { @@ -28,9 +29,10 @@ template ,int> = 0> AMREX_GPU_HOST_DEVICE Long at (T const& /*b*/, Long offset) noexcept { return offset; } -//! Box version +//! BoxND version +template AMREX_GPU_HOST_DEVICE -AMREX_FORCE_INLINE Long size (Box const& b) noexcept +AMREX_FORCE_INLINE Long size (BoxND const& b) noexcept { AMREX_IF_ON_DEVICE((return b.numPts();)) AMREX_IF_ON_HOST(( @@ -39,19 +41,13 @@ AMREX_FORCE_INLINE Long size (Box const& b) noexcept )) } +template AMREX_GPU_HOST_DEVICE -AMREX_FORCE_INLINE Box at (Box const& b, Long offset) noexcept +AMREX_FORCE_INLINE BoxND at (BoxND const& b, Long offset) noexcept { AMREX_IF_ON_DEVICE(( - auto len = b.length3d(); - Long k = offset / (len[0]*len[1]); - Long j = (offset - k*(len[0]*len[1])) / len[0]; - Long i = (offset - k*(len[0]*len[1])) - j*len[0]; - IntVect iv{AMREX_D_DECL(static_cast(i), - static_cast(j), - static_cast(k))}; - iv += b.smallEnd(); - return Box(iv,iv,b.type()); + auto iv = b.atOffset(offset); + return BoxND(iv,iv,b.ixType()); )) AMREX_IF_ON_HOST(( amrex::ignore_unused(offset); diff --git a/Src/Base/AMReX_GpuTypes.H b/Src/Base/AMReX_GpuTypes.H index ecb992983ba..e01303656ff 100644 --- a/Src/Base/AMReX_GpuTypes.H +++ b/Src/Base/AMReX_GpuTypes.H @@ -84,7 +84,7 @@ struct Handler struct Handler { - AMREX_GPU_HOST_DEVICE constexpr Handler (int n_active_threads = -1) + AMREX_GPU_HOST_DEVICE constexpr explicit Handler (int n_active_threads = -1) : numActiveThreads(n_active_threads) {} AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE diff --git a/Src/Base/AMReX_IArrayBox.H b/Src/Base/AMReX_IArrayBox.H index db0f26d5080..b070f5c1f96 100644 --- a/Src/Base/AMReX_IArrayBox.H +++ b/Src/Base/AMReX_IArrayBox.H @@ -94,7 +94,6 @@ public: void readFrom (std::istream& is); - //! Initialize from ParmParse with "fab" prefix. static void Initialize (); static void Finalize (); diff --git a/Src/Base/AMReX_IArrayBox.cpp b/Src/Base/AMReX_IArrayBox.cpp index 40b2762729d..1b9eea50658 100644 --- a/Src/Base/AMReX_IArrayBox.cpp +++ b/Src/Base/AMReX_IArrayBox.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include @@ -32,7 +31,6 @@ void IArrayBox::Initialize () { if (initialized) { return; } -// ParmParse pp("iab"); ifabio = std::make_unique(); diff --git a/Src/Base/AMReX_IndexType.H b/Src/Base/AMReX_IndexType.H index 0fd613d2a99..ea310b9b430 100644 --- a/Src/Base/AMReX_IndexType.H +++ b/Src/Base/AMReX_IndexType.H @@ -3,145 +3,364 @@ #define BL_INDEXTYPE_H #include -#include #include #include +#include #include namespace amrex { +/** +* \brief Type for defining CellIndex so that all IndexTypeND with different dimensions +* have the same CellIndex type. +*/ +struct CellIndexEnum { + //! The cell index type: one of CELL or NODE. + enum CellIndex { CELL = 0, NODE = 1 }; +}; + /** * \brief Cell-Based or Node-Based Indices * -* The class IndexType defines an index as being cell based or node (edge) -* based in each of the AMREX_SPACEDIM directions. This class defines an +* The class IndexTypeND defines an index as being cell based or node (edge) +* based in each of the dim directions. This class defines an * enumerated type CellIndex to be either CELL or NODE; i.e. each of the -* AMREX_SPACEDIM dimensions must be either CELL or NODE. +* dim dimensions must be either CELL or NODE. */ -class IndexType +template +class IndexTypeND : public CellIndexEnum { - friend MPI_Datatype ParallelDescriptor::Mpi_typemap::type(); - public: - //! The cell index type: one of CELL or NODE. - enum CellIndex { CELL = 0, NODE = 1 }; + static_assert(1 <= dim && dim <= 31, "The number of dimensions of IndexTypeND must be positive" + " and less than 32"); + //! The default constructor AMREX_GPU_HOST_DEVICE - constexpr IndexType () noexcept = default; - //! Construct an IndexType identical to an IntVect. + constexpr IndexTypeND () noexcept = default; + //! Construct an IndexTypeND identical to an IntVectND. AMREX_GPU_HOST_DEVICE - explicit IndexType (const IntVect& iv) noexcept - : itype(AMREX_D_TERM((iv[0]?1:0), | ((iv[1]?1:0)<<1), | ((iv[2]?1:0)<<2))) - {} + explicit IndexTypeND (const IntVectND& iv) noexcept { + for (int i=0; i, + int> = 0> AMREX_GPU_HOST_DEVICE - constexpr IndexType (AMREX_D_DECL(CellIndex i, CellIndex j, CellIndex k)) noexcept - : itype(AMREX_D_TERM(i, | (j<<1), | (k<<2))) - {} + constexpr IndexTypeND (CellIndex i, Args...js) noexcept { + CellIndex locarr[dim] = {i, static_cast(js)...}; + for (int s=0; s> dir); } - //! Return an integer representing the IndexType in direction dir. + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr + CellIndex ixType (int dir) const noexcept { return (CellIndex) ((itype & (1U<> dir); } + //! Return an integer representing the IndexTypeND in direction dir. [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int operator[] (int dir) const noexcept { return test(dir); } - //! Fill an IntVect of size AMREX_SPACEDIM with IndexTypes. + //! Returns the i'th CellIndex of the IndexTypeND. Used by structured bindings. + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr + CellIndex get () const noexcept { static_assert(0<=i && i>1)&1, (itype>>2)&1)); } - //! Fill an IntVect of size AMREX_SPACEDIM with IndexTypes. + IntVectND ixType () const noexcept { + IntVectND retval(0); + for (int i=0; i>1)&1, (itype>>2)&1)); } + IntVectND toIntVect () const noexcept { + IntVectND retval(0); + for (int i=0; i TheCellType () noexcept { + return IndexTypeND{}; } /** - * \brief This static member function returns an IndexType object of value - * IndexType::NODE. It is provided as a convenience to our users + * \brief This static member function returns an IndexTypeND object of value + * IndexTypeND::NODE. It is provided as a convenience to our users * when defining a Box all of whose faces should be of type - * IndexType::NODE. + * IndexTypeND::NODE. + */ + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr IndexTypeND TheNodeType () noexcept { + IndexTypeND retval{}; + retval.setall(); + return retval; + } + + //! Return the size of this IndexTypeND. + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr std::size_t size () noexcept { + return static_cast(dim); + } + + //! Return the size of this IndexTypeND. + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int isize () noexcept { + return dim; + } + + using value_type = CellIndex; + + /** + * \brief Returns a new IndexTypeND of size new_dim and + * assigns the first new_dim values of this IndexTypeND to it. + */ + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + IndexTypeND shrink () const noexcept { + static_assert(new_dim <= dim); + IndexTypeND retval{}; + retval.getBits() = itype & ((1U << new_dim) - 1); + return retval; + } + + /** + * \brief Returns a new IndexTypeND of size new_dim and + * assigns all values of this IndexTypeND to it and fill_extra to the remaining elements. + */ + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + IndexTypeND expand (CellIndex fill_extra=CellIndex::CELL) const noexcept { + static_assert(new_dim >= dim); + IndexTypeND retval{}; + retval.getBits() = itype; + if (fill_extra == CellIndex::NODE) { + retval.getBits() |= (1U << new_dim) - (1U << dim); + } + return retval; + } + + /** + * \brief Returns a new IndexTypeND of size new_dim + * by either shrinking or expanding this IndexTypeND */ + template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - static constexpr IndexType TheNodeType () noexcept { - return IndexType(AMREX_D_DECL(IndexType::NODE, - IndexType::NODE, - IndexType::NODE)); + IndexTypeND resize (CellIndex fill_extra=CellIndex::CELL) const noexcept { + if constexpr (new_dim > dim) { + return expand(fill_extra); + } else { + return shrink(); + } } + //! Return the bit field representing the underlying data + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr + unsigned int& getBits () noexcept { return itype; } + + //! Return the bit field representing the underlying data + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr + const unsigned int& getBits () const noexcept { return itype; } + private: //! Returns 1<> (std::istream& is, IndexType& itype); +using IndexType = IndexTypeND; + +// Template deduction guide for IndexTypeND +template +AMREX_GPU_HOST_DEVICE // __device__ for HIP +IndexTypeND(const IntVectND&) -> IndexTypeND; + +// Template deduction guide for IndexTypeND +template , + int> = 0> +AMREX_GPU_HOST_DEVICE // __device__ for HIP +IndexTypeND(IndexType::CellIndex, Args...) -> IndexTypeND; + +namespace detail { + std::ostream& index_type_write (std::ostream& os, const unsigned int& iv, int dim); + std::istream& index_type_read (std::istream& is, unsigned int& iv, int dim); + + template + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr + T IndexTypeSplit_imp (T& retval, std::index_sequence, unsigned int src) noexcept { + int dim_shift = 0; + ( + ( + amrex::get(retval).getBits() = + (src >> dim_shift) & ((1U << amrex::get(retval).isize()) - 1), + dim_shift += amrex::get(retval).isize() + ), ... + ); + return retval; + } +} + +//! Write an IndexTypeND to an ostream in ASCII. +template +std::ostream& operator<< (std::ostream& os, const IndexTypeND& it) { + return detail::index_type_write(os, it.getBits(), dim); +} +//! Read an IndexTypeND from an istream. +template +std::istream& operator>> (std::istream& is, IndexTypeND& it) { + return detail::index_type_read(is, it.getBits(), dim); +} + +/** +* \brief Returns a IndexTypeND obtained by concatenating the input IndexTypeNDs. +* The dimension of the return value equals the sum of the dimensions of the inputted IndexTypeNDs. +*/ +template +[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +constexpr IndexTypeND()> +IndexTypeCat (const IndexTypeND& v, const IndexTypeND&...vects) noexcept { + IndexTypeND()> retval{}; + retval.getBits() |= v.getBits(); + int dim_shift = v.isize(); + ( + ( + retval.getBits() |= (vects.getBits() << dim_shift), + dim_shift += vects.isize() + ), ... + ); + return retval; +} + +/** +* \brief Returns a tuple of IndexTypeND obtained by splitting the input IndexTypeND +* according to the dimensions specified by the template arguments. +*/ +template +[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +constexpr GpuTuple, IndexTypeND...> +IndexTypeSplit (const IndexTypeND()>& v) noexcept { + GpuTuple, IndexTypeND...> retval{}; + return detail::IndexTypeSplit_imp(retval, + std::make_index_sequence<1 + sizeof...(dims)>(), + v.getBits()); +} + +/** +* \brief Returns a new IndexTypeND of size new_dim and +* assigns the first new_dim values of v to it. +*/ +template +[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +constexpr IndexTypeND +IndexTypeShrink (const IndexTypeND& v) noexcept { + return v.template shrink(); +} + +/** +* \brief Returns a new IndexTypeND of size new_dim and +* assigns all values of iv to it and fill_extra to the remaining elements. +*/ +template +[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +constexpr IndexTypeND +IndexTypeExpand (const IndexTypeND& v, + IndexType::CellIndex fill_extra=IndexType::CellIndex::CELL) noexcept { + return v.template expand(fill_extra); +} +/** +* \brief Returns a new IndexTypeND of size new_dim +* by either shrinking or expanding iv. +*/ +template +[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +constexpr IndexTypeND +IndexTypeResize (const IndexTypeND& v, + IndexType::CellIndex fill_extra=IndexType::CellIndex::CELL) noexcept { + return v.template resize(fill_extra); } +} // namespace amrex + +// Spcialize std::tuple_size for IndexTypeND. Used by structured bindings. +template +struct std::tuple_size> { + static constexpr std::size_t value = dim; +}; + +// Spcialize std::tuple_element for IndexTypeND. Used by structured bindings. +template +struct std::tuple_element> { + using type = typename amrex::IndexTypeND::CellIndex; +}; + #endif /*BL_INDEXTYPE_H*/ diff --git a/Src/Base/AMReX_IndexType.cpp b/Src/Base/AMReX_IndexType.cpp index e96052a4d14..e2cd7097ecf 100644 --- a/Src/Base/AMReX_IndexType.cpp +++ b/Src/Base/AMReX_IndexType.cpp @@ -4,16 +4,16 @@ #include #include -namespace amrex { +namespace amrex::detail { std::ostream& -operator<< (std::ostream& os, - const IndexType& it) +index_type_write (std::ostream& os, const unsigned int& iv, int dim) { - os << '(' - << AMREX_D_TERM( (it.test(0)?'N':'C'), - << ',' << (it.test(1)?'N':'C'), - << ',' << (it.test(2)?'N':'C')) << ')' << std::flush; + os << '(' << (((iv & 1U) != 0) ? 'N' : 'C'); + for (int i=1; i> (std::istream& is, - IndexType& it) +index_type_read (std::istream& is, unsigned int& iv, int dim) { - char AMREX_D_DECL(t0,t1,t2); - - AMREX_D_EXPR( is.ignore(BL_IGNORE_MAX, '(') >> t0, - is.ignore(BL_IGNORE_MAX, ',') >> t1, - is.ignore(BL_IGNORE_MAX, ',') >> t2); + char t = '0'; + is.ignore(BL_IGNORE_MAX, '(') >> t; + BL_ASSERT(t == 'C' || t == 'N'); + t == 'N' ? (iv |= 1U) : (iv &= ~1U); + for (int i=1; i> t; + BL_ASSERT(t == 'C' || t == 'N'); + t == 'N' ? (iv |= (1U << i)) : (iv &= ~(1U << i)); + } is.ignore(BL_IGNORE_MAX, ')'); - AMREX_D_TERM( - BL_ASSERT(t0 == 'C' || t0 == 'N'); t0=='N'?it.set(0):it.unset(0); , - BL_ASSERT(t1 == 'C' || t1 == 'N'); t1=='N'?it.set(1):it.unset(1); , - BL_ASSERT(t2 == 'C' || t2 == 'N'); t2=='N'?it.set(2):it.unset(2)); if (is.fail()) { amrex::Error("operator>>(ostream&,IndexType&) failed"); diff --git a/Src/Base/AMReX_IntVect.H b/Src/Base/AMReX_IntVect.H index 2cf48053cd2..6452f29b036 100644 --- a/Src/Base/AMReX_IntVect.H +++ b/Src/Base/AMReX_IntVect.H @@ -167,6 +167,18 @@ public: } } + template = 0> + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Dim3 dim3 ([[maybe_unused]] int fill_extra) const noexcept { + if constexpr (dim == 1) { + return Dim3{vect[0],fill_extra,fill_extra}; + } else if constexpr (dim == 2) { + return Dim3{vect[0],vect[1],fill_extra}; + } else { + return Dim3{vect[0],vect[1],vect[2]}; + } + } + #if __cplusplus >= 201402L template< typename T = int > AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -1108,7 +1120,7 @@ namespace detail { template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T IntVectSplit_imp (T& retval, std::index_sequence, const int * src) noexcept { - (IntVectSplit_imp2(std::get(retval), src), ...); + (IntVectSplit_imp2(amrex::get(retval), src), ...); return retval; } diff --git a/Src/Base/AMReX_IntegratorBase.H b/Src/Base/AMReX_IntegratorBase.H index d9af8053d7e..af29dc82cca 100644 --- a/Src/Base/AMReX_IntegratorBase.H +++ b/Src/Base/AMReX_IntegratorBase.H @@ -165,31 +165,25 @@ protected: /** * \brief Rhs is the right-hand-side function the integrator will use. */ - std::function Rhs; + std::function Rhs; /** * \brief RhsIm is the implicit right-hand-side function an ImEx integrator * will use. */ - std::function RhsIm; + std::function RhsIm; /** * \brief RhsEx is the explicit right-hand-side function an ImEx integrator * will use. */ - std::function RhsEx; + std::function RhsEx; /** * \brief RhsFast is the fast timescale right-hand-side function a multirate * integrator will use. */ - std::function RhsFast; - - /** - * \brief The pre_rhs_action function is called by the integrator on state - * data before using it to evaluate a right-hand side. - */ - std::function pre_rhs_action; + std::function RhsFast; /** * \brief The post_stage_action function is called by the integrator on @@ -283,28 +277,23 @@ public: virtual ~IntegratorBase () = default; - void set_rhs (std::function F) + void set_rhs (std::function F) { Rhs = F; } - void set_imex_rhs (std::function Fi, - std::function Fe) + void set_imex_rhs (std::function Fi, + std::function Fe) { RhsIm = Fi; RhsEx = Fe; } - void set_fast_rhs (std::function F) + void set_fast_rhs (std::function F) { RhsFast = F; } - void set_pre_rhs_action (std::function A) - { - pre_rhs_action = A; - } - void set_post_stage_action (std::function A) { post_stage_action = A; @@ -325,12 +314,6 @@ public: post_fast_step_action = A; } - void set_post_update (std::function A) - { - set_post_stage_action(A); - set_post_step_action(A); - } - amrex::Real get_time_step () { return time_step; diff --git a/Src/Base/AMReX_Loop.H b/Src/Base/AMReX_Loop.H index 076985caac3..fe76b8c9882 100644 --- a/Src/Base/AMReX_Loop.H +++ b/Src/Base/AMReX_Loop.H @@ -7,6 +7,117 @@ namespace amrex { +namespace loop_detail { + + // call_f_intvect_inner + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND<1> iv, Args...args) + noexcept -> decltype(f(0, 0, 0, args...)) + { + f(iv[0], 0, 0, args...); + } + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND<2> iv, Args...args) + noexcept -> decltype(f(0, 0, 0, args...)) + { + f(iv[0], iv[1], 0, args...); + } + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND iv, Args...args) + noexcept -> decltype(f(iv, args...)) + { + f(iv, args...); + } + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_inner (std::index_sequence, F const& f, IntVectND iv, Args...args) + noexcept -> decltype(f(iv[Ns]..., args...)) + { + f(iv[Ns]..., args...); + } + + // call_f_intvect + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect (F const& f, IntVectND iv) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv)) + { + call_f_intvect_inner(std::make_index_sequence(), f, iv); + } + + // call_f_intvect_ncomp + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto call_f_intvect_ncomp (F const& f, IntVectND iv, int n) + noexcept -> decltype(call_f_intvect_inner(std::make_index_sequence(), f, iv, 0)) + { + call_f_intvect_inner(std::make_index_sequence(), f, iv, n); + } + + // call_f_intvect_inner_cpu + + template + AMREX_FORCE_INLINE + auto call_f_intvect_inner_cpu (std::index_sequence, F const& f, IntVectND<1> iv, Args...args) + noexcept -> decltype(f(0, 0, 0, args...)) + { + f(iv[0], 0, 0, args...); + } + + template + AMREX_FORCE_INLINE + auto call_f_intvect_inner_cpu (std::index_sequence, F const& f, IntVectND<2> iv, Args...args) + noexcept -> decltype(f(0, 0, 0, args...)) + { + f(iv[0], iv[1], 0, args...); + } + + template + AMREX_FORCE_INLINE + auto call_f_intvect_inner_cpu (std::index_sequence, F const& f, IntVectND iv, Args...args) + noexcept -> decltype(f(iv, args...)) + { + f(iv, args...); + } + + template + AMREX_FORCE_INLINE + auto call_f_intvect_inner_cpu (std::index_sequence, F const& f, IntVectND iv, Args...args) + noexcept -> decltype(f(iv[Ns]..., args...)) + { + f(iv[Ns]..., args...); + } + + // call_f_intvect_cpu + + template + AMREX_FORCE_INLINE + auto call_f_intvect_cpu (F const& f, IntVectND iv) + noexcept -> decltype(call_f_intvect_inner_cpu(std::make_index_sequence(), f, iv)) + { + call_f_intvect_inner_cpu(std::make_index_sequence(), f, iv); + } + + // call_f_intvect_ncomp_cpu + + template + AMREX_FORCE_INLINE + auto call_f_intvect_ncomp_cpu (F const& f, IntVectND iv, int n) + noexcept -> decltype(call_f_intvect_inner_cpu(std::make_index_sequence(), f, iv, 0)) + { + call_f_intvect_inner_cpu(std::make_index_sequence(), f, iv, n); + } +} + template AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR @@ -59,64 +170,178 @@ void LoopConcurrent (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept }}}} } -template +namespace loop_detail { + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void Loop_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv) noexcept +{ + if constexpr (idim == 1) { + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect(f,iv); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect(f,iv); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect(f,iv); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + Loop_impND(f, lo, hi, iv); + } + } +} + +} + +template AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR -void Loop (Box const& bx, F const& f) noexcept +void Loop (BoxND const& bx, F const& f) noexcept { - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k); - }}} + const auto lo = amrex::lbound_iv(bx); + const auto hi = amrex::ubound_iv(bx); + IntVectND iv; + loop_detail::Loop_impND(f, lo, hi, iv); } -template +namespace loop_detail { + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void Loop_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv, int n) noexcept +{ + if constexpr (idim == 1) { + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp(f,iv,n); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp(f,iv,n); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp(f,iv,n); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + Loop_impND(f, lo, hi, iv, n); + } + } +} + +} + +template AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR -void Loop (Box const& bx, int ncomp, F const& f) noexcept +void Loop (BoxND const& bx, int ncomp, F const& f) noexcept { - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); + const auto lo = amrex::lbound_iv(bx); + const auto hi = amrex::ubound_iv(bx); + IntVectND iv; for (int n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k,n); - }}}} + loop_detail::Loop_impND(f, lo, hi, iv, n); + } } -template +namespace loop_detail { + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void LoopConcurrent_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv) noexcept +{ + if constexpr (idim == 1) { + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect(f,iv); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect(f,iv); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect(f,iv); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + LoopConcurrent_impND(f, lo, hi, iv); + } + } +} + +} + +template AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR -void LoopConcurrent (Box const& bx, F const& f) noexcept +void LoopConcurrent (BoxND const& bx, F const& f) noexcept { - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k); - }}} + const auto lo = amrex::lbound_iv(bx); + const auto hi = amrex::ubound_iv(bx); + IntVectND iv; + loop_detail::LoopConcurrent_impND(f, lo, hi, iv); } -template +namespace loop_detail { + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void LoopConcurrent_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv, int n) noexcept +{ + if constexpr (idim == 1) { + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp(f,iv,n); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp(f,iv,n); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp(f,iv,n); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + LoopConcurrent_impND(f, lo, hi, iv, n); + } + } +} + +} + +template AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR -void LoopConcurrent (Box const& bx, int ncomp, F const& f) noexcept +void LoopConcurrent (BoxND const& bx, int ncomp, F const& f) noexcept { - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); + const auto lo = amrex::lbound_iv(bx); + const auto hi = amrex::ubound_iv(bx); + IntVectND iv; for (int n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k,n); - }}}} + loop_detail::LoopConcurrent_impND(f, lo, hi, iv, n); + } } // The functions above are __host__ __device__ functions. If f is not a @@ -172,60 +397,174 @@ void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept }}}} } -template +namespace loop_detail { + +template +AMREX_FORCE_INLINE +void LoopOnCpu_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv) noexcept +{ + if constexpr (idim == 1) { + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_cpu(f,iv); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_cpu(f,iv); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_cpu(f,iv); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + LoopOnCpu_impND(f, lo, hi, iv); + } + } +} + +} + +template AMREX_ATTRIBUTE_FLATTEN_FOR -void LoopOnCpu (Box const& bx, F const& f) noexcept +void LoopOnCpu (BoxND const& bx, F const& f) noexcept { - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k); - }}} + const auto lo = amrex::lbound_iv(bx); + const auto hi = amrex::ubound_iv(bx); + IntVectND iv; + loop_detail::LoopOnCpu_impND(f, lo, hi, iv); } -template +namespace loop_detail { + +template +AMREX_FORCE_INLINE +void LoopOnCpu_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv, int n) noexcept +{ + if constexpr (idim == 1) { + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_cpu(f,iv,n); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_cpu(f,iv,n); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_cpu(f,iv,n); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + LoopOnCpu_impND(f, lo, hi, iv, n); + } + } +} + +} + +template AMREX_ATTRIBUTE_FLATTEN_FOR -void LoopOnCpu (Box const& bx, int ncomp, F const& f) noexcept +void LoopOnCpu (BoxND const& bx, int ncomp, F const& f) noexcept { - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); + const auto lo = amrex::lbound_iv(bx); + const auto hi = amrex::ubound_iv(bx); + IntVectND iv; for (int n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k,n); - }}}} + loop_detail::LoopOnCpu_impND(f, lo, hi, iv, n); + } } -template +namespace loop_detail { + +template +AMREX_FORCE_INLINE +void LoopConcurrentOnCpu_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv) noexcept +{ + if constexpr (idim == 1) { + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_cpu(f,iv); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_cpu(f,iv); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_cpu(f,iv); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + LoopConcurrentOnCpu_impND(f, lo, hi, iv); + } + } +} + +} + +template AMREX_ATTRIBUTE_FLATTEN_FOR -void LoopConcurrentOnCpu (Box const& bx, F const& f) noexcept +void LoopConcurrentOnCpu (BoxND const& bx, F const& f) noexcept { - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k); - }}} + const auto lo = amrex::lbound_iv(bx); + const auto hi = amrex::ubound_iv(bx); + IntVectND iv; + loop_detail::LoopConcurrentOnCpu_impND(f, lo, hi, iv); } -template +namespace loop_detail { + +template +AMREX_FORCE_INLINE +void LoopConcurrentOnCpu_impND (L const& f, IntVectND const lo, IntVectND const hi, IntVectND iv, int n) noexcept +{ + if constexpr (idim == 1) { + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_cpu(f,iv,n); + } + } else if constexpr (idim == 2) { + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_cpu(f,iv,n); + }} + } else if constexpr (idim == 3) { + for (int i2 = lo[2], h2 = hi[2]; i2 <= h2; ++i2) { iv[2] = i2; + for (int i1 = lo[1], h1 = hi[1]; i1 <= h1; ++i1) { iv[1] = i1; + AMREX_PRAGMA_SIMD + for (int i0 = lo[0], h0 = hi[0]; i0 <= h0; ++i0) { iv[0] = i0; + call_f_intvect_ncomp_cpu(f,iv,n); + }}} + } else { + for (int id = lo[idim-1], hd = hi[idim-1]; id <= hd; ++id) { iv[idim-1] = id; + LoopConcurrentOnCpu_impND(f, lo, hi, iv, n); + } + } +} + +} + +template AMREX_ATTRIBUTE_FLATTEN_FOR -void LoopConcurrentOnCpu (Box const& bx, int ncomp, F const& f) noexcept +void LoopConcurrentOnCpu (BoxND const& bx, int ncomp, F const& f) noexcept { - const auto lo = amrex::lbound(bx); - const auto hi = amrex::ubound(bx); + const auto lo = amrex::lbound_iv(bx); + const auto hi = amrex::ubound_iv(bx); + IntVectND iv; for (int n = 0; n < ncomp; ++n) { - for (int k = lo.z; k <= hi.z; ++k) { - for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - f(i,j,k,n); - }}}} + loop_detail::LoopConcurrentOnCpu_impND(f, lo, hi, iv, n); + } } // Implementation of "constexpr for" based on diff --git a/Src/Base/AMReX_MFIter.H b/Src/Base/AMReX_MFIter.H index 7f0ca4d3539..fd21b0ee7c2 100644 --- a/Src/Base/AMReX_MFIter.H +++ b/Src/Base/AMReX_MFIter.H @@ -167,6 +167,8 @@ public: static int allowMultipleMFIters (int allow); + static int currentDepth (); + void Finalize (); protected: diff --git a/Src/Base/AMReX_MFIter.cpp b/Src/Base/AMReX_MFIter.cpp index f68ab9ce35f..8180318dadd 100644 --- a/Src/Base/AMReX_MFIter.cpp +++ b/Src/Base/AMReX_MFIter.cpp @@ -17,6 +17,17 @@ MFIter::allowMultipleMFIters (int allow) return allow; } +int +MFIter::currentDepth () +{ + int r; +#ifdef AMREX_USE_OMP +#pragma omp atomic read +#endif + r = MFIter::depth; + return r; +} + MFIter::MFIter (const FabArrayBase& fabarray_, unsigned char flags_) : @@ -222,13 +233,6 @@ MFIter::Finalize () // mark as invalid currentIndex = endIndex; -#ifdef AMREX_USE_OMP -#pragma omp master -#endif - { - depth = 0; - } - #ifdef BL_USE_TEAM if ( ! (flags & NoTeamBarrier) ) ParallelDescriptor::MyTeam().MemoryBarrier(); @@ -257,6 +261,13 @@ MFIter::Finalize () if (m_fa) { m_fa.reset(nullptr); } + +#ifdef AMREX_USE_OMP +#pragma omp master +#endif + { + depth = 0; + } } void diff --git a/Src/Base/AMReX_Machine.cpp b/Src/Base/AMReX_Machine.cpp index 97de84f30c8..a7cc0d3993e 100644 --- a/Src/Base/AMReX_Machine.cpp +++ b/Src/Base/AMReX_Machine.cpp @@ -327,9 +327,9 @@ class Machine void get_params () { - ParmParse pp("machine"); - pp.queryAdd("verbose", flag_verbose); - pp.queryAdd("very_verbose", flag_very_verbose); + ParmParse pp("amrex.machine"); + pp.query("verbose", flag_verbose); + pp.query("very_verbose", flag_very_verbose); } static std::string get_env_str (const std::string& env_key) diff --git a/Src/Base/AMReX_Math.H b/Src/Base/AMReX_Math.H index 42762001268..c4d8d524af0 100644 --- a/Src/Base/AMReX_Math.H +++ b/Src/Base/AMReX_Math.H @@ -274,7 +274,7 @@ std::uint64_t umulhi (std::uint64_t a, std::uint64_t b) /// struct FastDivmodU64 { - std::uint64_t divisor; + std::uint64_t divisor = 0; #ifdef AMREX_INT128_SUPPORTED std::uint64_t multiplier = 1U; @@ -324,6 +324,9 @@ struct FastDivmodU64 #endif + /// Default construct an invalid FastDivmodU64 + FastDivmodU64 () = default; + /// Returns the quotient of floor(dividend / divisor) [[nodiscard]] AMREX_GPU_HOST_DEVICE std::uint64_t divide (std::uint64_t dividend) const diff --git a/Src/Base/AMReX_MemPool.H b/Src/Base/AMReX_MemPool.H index 9a3b92fae04..5a87793aa8f 100644 --- a/Src/Base/AMReX_MemPool.H +++ b/Src/Base/AMReX_MemPool.H @@ -2,8 +2,12 @@ #define BL_MEMPOOL_H #include +#include #include +#include +#include + extern "C" { void amrex_mempool_init (); void amrex_mempool_finalize (); @@ -14,4 +18,40 @@ extern "C" { void amrex_array_init_snan (amrex_real* p, size_t nelems); } +namespace amrex { + template || std::is_same_v, int> FOO = 0> + void fill_snan (T* p, std::size_t nelems) + { + if (p == nullptr || nelems == 0) { return; } +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && run_on == RunOn::Device) { + amrex::ParallelFor(nelems, [=] AMREX_GPU_DEVICE (Long i) noexcept + { + p[i] = std::numeric_limits::signaling_NaN(); + }); + } else +#endif + { + if constexpr (std::is_same_v) { +#ifdef UINT32_MAX + const uint32_t snan = UINT32_C(0x7fa00000); + static_assert(sizeof(float) == sizeof(uint32_t), "MemPool: sizeof float != sizeof uint32_t"); + for (size_t i = 0; i < nelems; ++i) { + std::memcpy(p++, &snan, sizeof(float)); + } +#endif + } else if constexpr (std::is_same_v) { +#ifdef UINT64_MAX + const uint64_t snan = UINT64_C(0x7ff0000080000001); + static_assert(sizeof(double) == sizeof(uint64_t), "MemPool: sizeof double != sizeof uint64_t"); + for (size_t i = 0; i < nelems; ++i) { + std::memcpy(p++, &snan, sizeof(double)); + } +#endif + } + } + } +} + #endif diff --git a/Src/Base/AMReX_MemPool.cpp b/Src/Base/AMReX_MemPool.cpp index e1e26ce7b6d..3d4bfde5247 100644 --- a/Src/Base/AMReX_MemPool.cpp +++ b/Src/Base/AMReX_MemPool.cpp @@ -23,11 +23,6 @@ using namespace amrex; namespace { Vector > the_memory_pool; -#if defined(AMREX_TESTING) || defined(AMREX_DEBUG) - int init_snan = 1; -#else - int init_snan = 0; -#endif bool initialized = false; } @@ -41,9 +36,6 @@ void amrex_mempool_init () initialized = true; - ParmParse pp("fab"); - pp.queryAdd("init_snan", init_snan); - int nthreads = OpenMP::get_max_threads(); the_memory_pool.resize(nthreads); @@ -109,31 +101,12 @@ void amrex_mempool_get_stats (int& mp_min, int& mp_max, int& mp_tot) // min, max void amrex_real_array_init (Real* p, size_t nelems) { - if (init_snan) { amrex_array_init_snan(p, nelems); } + if (amrex::InitSNaN()) { amrex_array_init_snan(p, nelems); } } void amrex_array_init_snan (Real* p, size_t nelems) { -#ifdef BL_USE_DOUBLE - -#ifdef UINT64_MAX - const uint64_t snan = UINT64_C(0x7ff0000080000001); - static_assert(sizeof(double) == sizeof(uint64_t), "MemPool: sizeof double != sizeof uint64_t"); - for (size_t i = 0; i < nelems; ++i) { - std::memcpy(p++, &snan, sizeof(double)); - } -#endif - -#else - -#ifdef UINT32_MAX - const uint32_t snan = UINT32_C(0x7fa00000); - static_assert(sizeof(float) == sizeof(uint32_t), "MemPool: sizeof float != sizeof uint32_t"); - for (size_t i = 0; i < nelems; ++i) { - std::memcpy(p++, &snan, sizeof(float)); - } -#endif - -#endif + amrex::fill_snan(p, nelems); } + } diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index d4b3a3271ca..228070a13c9 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -398,6 +398,20 @@ namespace amrex * \param stddev standard deviation of normal distribution */ void FillRandomNormal (MultiFab& mf, int scomp, int ncomp, Real mean, Real stddev); + + /** + * \brief Convexify AMR data + * + * This function "convexifies" the AMR data by removing cells that are + * covered by fine levels from coarse level MultiFabs. This could be + * useful for visualization. The returned MultiFabs on coarse levels + * have different BoxArrays from the original BoxArrays. For the finest + * level, the data is simply copied to the returned object. The returned + * MultiFabs have no ghost cells. For nodal data, the nodes on the + * coarse/fine interface exist on both levels. + */ + [[nodiscard]] Vector convexify (Vector const& mf, + Vector const& refinement_ratio); } namespace amrex { diff --git a/Src/Base/AMReX_MultiFabUtil.cpp b/Src/Base/AMReX_MultiFabUtil.cpp index a2a1f14ed8d..86a1e290548 100644 --- a/Src/Base/AMReX_MultiFabUtil.cpp +++ b/Src/Base/AMReX_MultiFabUtil.cpp @@ -1216,4 +1216,57 @@ namespace amrex FillRandomNormal(p, npts, mean, stddev); } } + + Vector convexify (Vector const& mf, + Vector const& refinement_ratio) + { + if (mf.empty()) { return Vector{}; } + + const auto nlevels = int(mf.size()); + Vector rmf(nlevels); + + const int ncomp = mf[nlevels-1]->nComp(); + rmf[nlevels-1].define(mf[nlevels-1]->boxArray(), + mf[nlevels-1]->DistributionMap(), ncomp, 0); + MultiFab::Copy(rmf[nlevels-1], *mf[nlevels-1], 0, 0, ncomp, 0); + + for (int ilev = nlevels-2; ilev >= 0; --ilev) { + BoxArray fba = mf[ilev+1]->boxArray(); + BoxArray cba = mf[ilev ]->boxArray(); + AMREX_ASSERT(fba.ixType() == cba.ixType()); + AMREX_ASSERT(mf[ilev]->nComp() == ncomp); + + fba.convert(IntVect(0)).coarsen(refinement_ratio[ilev]); + cba.convert(IntVect(0)); + auto const& cdm = mf[ilev]->DistributionMap(); + + BoxList blnew, bltmp; + Vector procmap; + Vector localmap; + for (int ibox = 0; ibox < int(cba.size()); ++ibox) { + fba.complementIn(bltmp, cba[ibox]); + blnew.join(bltmp); + procmap.resize(procmap.size()+bltmp.size(), cdm[ibox]); + if (ParallelDescriptor::MyProc() == cdm[ibox]) { + localmap.resize(localmap.size()+bltmp.size(), ibox); + } + } + + if (blnew.isNotEmpty()) { + BoxArray banew(std::move(blnew)); + banew.convert(mf[ilev]->ixType()); + DistributionMapping dmnew(std::move(procmap)); + rmf[ilev].define(banew, dmnew, ncomp, 0); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(rmf[ilev], TilingIfNotGPU()); mfi.isValid(); ++mfi) { + rmf[ilev][mfi].template copy + ((*mf[ilev])[localmap[mfi.LocalIndex()]], mfi.tilebox()); + } + } + } + + return rmf; + } } diff --git a/Src/Base/AMReX_OpenMP.cpp b/Src/Base/AMReX_OpenMP.cpp index 53782c8c347..56225253fa7 100644 --- a/Src/Base/AMReX_OpenMP.cpp +++ b/Src/Base/AMReX_OpenMP.cpp @@ -175,7 +175,7 @@ namespace amrex::OpenMP } } else { std::optional num_omp_threads = to_int(omp_threads); - if (num_omp_threads.has_value()) { + if (num_omp_threads.has_value() && num_omp_threads.value() > 0) { omp_set_num_threads(num_omp_threads.value()); } else { diff --git a/Src/Base/AMReX_PODVector.H b/Src/Base/AMReX_PODVector.H index 0e7458fe5b0..464bb552ee1 100644 --- a/Src/Base/AMReX_PODVector.H +++ b/Src/Base/AMReX_PODVector.H @@ -2,10 +2,12 @@ #define AMREX_PODVECTOR_H_ #include +#include #include #include #include #include +#include #include #include @@ -196,6 +198,33 @@ namespace amrex #endif std::memmove(dst, src, count); } + + template class Allocator> + void maybe_init_snan (T* data, Size count, Allocator const& allocator) + { + amrex::ignore_unused(data, count, allocator); + if constexpr (std::is_same_v> || + std::is_same_v>) { + if (amrex::InitSNaN()) { +#ifdef AMREX_USE_GPU + if constexpr (RunOnGpu>::value) { + amrex::fill_snan(data, count); + Gpu::streamSynchronize(); + return; + } else if constexpr (IsPolymorphicArenaAllocator>::value) { + if (allocator.arena()->isManaged() || + allocator.arena()->isDevice()) + { + amrex::fill_snan(data, count); + Gpu::streamSynchronize(); + return; + } + } +#endif + amrex::fill_snan(data, count); + } + } + } } namespace VectorGrowthStrategy @@ -254,6 +283,7 @@ namespace amrex { if (a_size != 0) { m_data = allocate(m_size); + detail::maybe_init_snan(m_data, m_size, (Allocator const&)(*this)); } } @@ -366,7 +396,7 @@ namespace amrex else { // if the allocators are not the same we give up and copy - *this = a_vector; + *this = a_vector; // must copy instead of move } return *this; @@ -594,16 +624,18 @@ namespace amrex void resize (size_type a_new_size) { - if (m_capacity < a_new_size) { - reserve(a_new_size); + auto old_size = m_size; + resize_without_init_snan(a_new_size); + if (old_size < a_new_size) { + detail::maybe_init_snan(m_data + old_size, + m_size - old_size, (Allocator const&)(*this)); } - m_size = a_new_size; } void resize (size_type a_new_size, const T& a_val) { size_type old_size = m_size; - resize(a_new_size); + resize_without_init_snan(a_new_size); if (old_size < a_new_size) { detail::uninitializedFillNImpl(m_data + old_size, @@ -738,6 +770,14 @@ namespace amrex m_size = new_size; m_capacity = new_capacity; } + + void resize_without_init_snan (size_type a_new_size) + { + if (m_capacity < a_new_size) { + reserve(a_new_size); + } + m_size = a_new_size; + } }; } diff --git a/Src/Base/AMReX_ParallelDescriptor.H b/Src/Base/AMReX_ParallelDescriptor.H index f949ae6f45c..a41d393833d 100644 --- a/Src/Base/AMReX_ParallelDescriptor.H +++ b/Src/Base/AMReX_ParallelDescriptor.H @@ -107,7 +107,7 @@ while ( false ) void Initialize (); void Finalize (); - extern AMREX_EXPORT int use_gpu_aware_mpi; + extern AMREX_EXPORT bool use_gpu_aware_mpi; inline bool UseGpuAwareMpi () { return use_gpu_aware_mpi; } //! Split the process pool into teams diff --git a/Src/Base/AMReX_ParallelDescriptor.cpp b/Src/Base/AMReX_ParallelDescriptor.cpp index 60f08d4ca35..f6ac26e7984 100644 --- a/Src/Base/AMReX_ParallelDescriptor.cpp +++ b/Src/Base/AMReX_ParallelDescriptor.cpp @@ -57,9 +57,9 @@ namespace amrex::ParallelDescriptor { #endif #ifdef AMREX_USE_GPU - int use_gpu_aware_mpi = false; + bool use_gpu_aware_mpi = false; #else - int use_gpu_aware_mpi = false; + bool use_gpu_aware_mpi = false; #endif ProcessTeam m_Team; @@ -1537,9 +1537,9 @@ StartTeams () int do_team_reduce = 0; #if defined(BL_USE_MPI3) - ParmParse pp("team"); - pp.queryAdd("size", team_size); - pp.queryAdd("reduce", do_team_reduce); + ParmParse pp("amrex.team"); + pp.query("size", team_size); + pp.query("reduce", do_team_reduce); if (nprocs % team_size != 0) { amrex::Abort("Number of processes not divisible by team size"); } diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H index 11524eca689..cc9588793da 100644 --- a/Src/Base/AMReX_ParmParse.H +++ b/Src/Base/AMReX_ParmParse.H @@ -1,25 +1,31 @@ - -#ifndef BL_PARMPARSE_H -#define BL_PARMPARSE_H +#ifndef AMREX_PARMPARSE_H_ +#define AMREX_PARMPARSE_H_ #include #include +#include +#include +#include +#include #include +#include +#include +#include #include -#include #include -#include +#include #include -#include -#include namespace amrex { -class Box; +template +class BoxND; +using Box = BoxND; template class IntVectND; using IntVect = IntVectND; +class RealVect; // // ParmParse class implements a simple database for the storage and @@ -268,28 +274,31 @@ using IntVect = IntVectND; * t = 1.5 * #endif * +* Math expression is supported for integers and reals. For example +* +* n_cell = 128 +* amrex.n_cell = n_cell*2 8 16**2 +* +* becomes +* +* n_cell = 128 +* amrex.n_cell = 256 8 256 +* +* More details can be found at https://amrex-codes.github.io/amrex/docs_html/Basics.html#parmparse */ class ParmParse { public: - class Frame; - class Record; - enum { LAST = -1, FIRST = 0, ALL = -1 }; /** * \brief Construct an additional ParmParse object sharing the same - * internal table as any other such objects in existence. If - * prefix is specified, load this string as the code prefix - * for this particular ParmParse object. + * internal table as any other such objects in existence. If prefix is + * specified, load this string as the code prefix for this particular + * ParmParse object. If parser_prefix is specified, it will be used as + * prefixed in math expression evaluations. */ - explicit ParmParse (const std::string& prefix = std::string()); - - ParmParse (ParmParse const& rhs) = default; - ~ParmParse () = default; - - ParmParse (ParmParse && rhs) = delete; - ParmParse& operator= (const ParmParse& rhs) = delete; - ParmParse& operator= (ParmParse&& rhs) = delete; + explicit ParmParse (std::string prefix = std::string(), + std::string parser_prefix = std::string()); //! Returns true if name is in table. [[nodiscard]] bool contains (const char* name) const; @@ -305,15 +314,6 @@ public: */ [[nodiscard]] int countname (const std::string& name) const; /** - * \brief Returns the number of records of the given name (prepended with prefix) appears - * in the table - */ - [[nodiscard]] int countRecords (const std::string& name) const; - //! Returns the nth record of the given name, or zero if none found. - [[nodiscard]] Record getRecord (const std::string& name, int n = LAST) const; - //! Write the contents of the table in ASCII to the ostream. - static void dumpTable (std::ostream& os, bool prettyPrint = false); - /** * \brief Get the ival'th value of kth occurrence of the requested name. * If successful, the value is converted to a bool and stored * in reference ref. If the kth occurrence does not exist or @@ -345,7 +345,7 @@ public: int ival = FIRST) const; //! Same as querykth() but searches for the last occurrence of name. int query (const char* name, - bool& ref, + bool& ref, int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. void add (const char* name, bool val); @@ -383,8 +383,7 @@ public: int& ref, int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. - void add (const char* name, - int val); + void add (const char* name, int val); /** * \brief Get the ival'th value of kth occurrence of the requested name. * If successful, the value is converted to an int and stored @@ -418,8 +417,7 @@ public: long& ref, int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. - void add (const char* name, - long val); + void add (const char* name, long val); /** * \brief Get the ival'th value of kth occurrence of the requested name. * If successful, the value is converted to an int and stored @@ -453,8 +451,7 @@ public: long long& ref, int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. - void add (const char* name, - long long val); + void add (const char* name, long long val); /** * \brief Get the ival'th value of kth occurrence of the requested name. * If successful, the value is converted to a float and stored @@ -488,8 +485,7 @@ public: float& ref, int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. - void add (const char* name, - float val); + void add (const char* name, float val); /** * \brief Get the ival'th value of kth occurrence of the requested name. * If successful, the value is converted to a double and stored @@ -523,8 +519,7 @@ public: double& ref, int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. - void add (const char* name, - double val); + void add (const char* name, double val); /** * \brief Get the ival'th value of kth occurrence of the requested name. * If successful, the value is converted to a std::string and stored @@ -534,15 +529,15 @@ public: * is output and the program halts. Note that ival = 0 is the * first value in the list. */ - void getkth (const char* name, - int k, - std::string& ref, - int ival = FIRST) const; + void getkth (const char* name, + int k, + std::string& ref, + int ival = FIRST) const; //! Same as getkth() but searches for the last occurrence of name. - void get (const char* name, - std::string& ref, - int ival = FIRST) const; + void get (const char* name, + std::string& ref, + int ival = FIRST) const; /** * \brief Similar to getkth() but returns 0 if there is no kth occurrence * of name. If successful, it returns 1 and stores the value in @@ -550,23 +545,16 @@ public: * occurrence does not, or if there is a type mismatch, then the * program signals an error and halts. */ - int querykth (const char* name, - int k, - std::string& ref, - int ival = FIRST) const; + int querykth (const char* name, + int k, + std::string& ref, + int ival = FIRST) const; //! Same as querykth() but searches for the last occurrence of name. - int query (const char* name, - std::string& ref, - int ival = FIRST) const; + int query (const char* name, + std::string& ref, + int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. - void add (const char* name, - const std::string& val); - - //! keyword for files to load - static std::string const FileKeyword; - - //! Add keys and values from a file to the end of the PP table. - static void addfile (std::string const& filename); + void add (const char* name, const std::string& val); /** * \brief Get the ival'th value of kth occurrence of the requested name. @@ -594,15 +582,14 @@ public: */ int querykth (const char* name, int k, - IntVect& ref, + IntVect& ref, int ival = FIRST) const; //! Same as querykth() but searches for the last occurrence of name. int query (const char* name, IntVect& ref, int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. - void add (const char* name, - const IntVect& val); + void add (const char* name, const IntVect& val); /** * \brief Get the ival'th value of kth occurrence of the requested name. * If successful, the value is converted to a Box and stored @@ -636,8 +623,7 @@ public: Box& ref, int ival = FIRST) const; //! Add a key 'name'with value 'ref' to the end of the PP table. - void add (const char* name, - const Box& val); + void add (const char* name, const Box& val); /** * \brief Gets an std::vector\ of num_val values from kth occurrence of * given name. If successful, the values are converted to an int @@ -650,27 +636,27 @@ public: * converted to an int, an error message is reported and the * program halts. */ - void getktharr (const char* name, - int k, + void getktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as getktharr() but searches for last occurrence of name. - void getarr (const char* name, + void getarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! queryktharr() is to querykth() as getktharr() is to getkth(). - int queryktharr (const char* name, - int k, + int queryktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as queryktharr() but searches for last occurrence of name. - int queryarr (const char* name, + int queryarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Add a key 'name' with vector of values 'ref' to the end of the PP table. void addarr (const char* name, const std::vector& ref); @@ -686,30 +672,29 @@ public: * converted to a long, an error message is reported and the * program halts. */ - void getktharr (const char* name, - int k, + void getktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as getktharr() but searches for last occurrence of name. - void getarr (const char* name, + void getarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! queryktharr() is to querykth() as getktharr() is to getkth(). - int queryktharr (const char* name, - int k, + int queryktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as queryktharr() but searches for last occurrence of name. - int queryarr (const char* name, + int queryarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Add a key 'name' with vector of values 'ref' to the end of the PP table. - void addarr (const char* name, - const std::vector& ref); + void addarr (const char* name, const std::vector& ref); /** * \brief Gets an std::vector\ of num_val values from kth occurrence of @@ -723,27 +708,27 @@ public: * converted to a long long, an error message is reported and the * program halts. */ - void getktharr (const char* name, - int k, + void getktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as getktharr() but searches for last occurrence of name. - void getarr (const char* name, + void getarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! queryktharr() is to querykth() as getktharr() is to getkth(). - int queryktharr (const char* name, - int k, + int queryktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as queryktharr() but searches for last occurrence of name. - int queryarr (const char* name, + int queryarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Add a key 'name' with vector of values 'ref' to the end of the PP table. void addarr (const char* name, const std::vector& ref); @@ -759,27 +744,27 @@ public: * values cannot be converted to a float, an error message is * reported and the program halts. */ - void getktharr (const char* name, - int k, + void getktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as getktharr() but searches for last occurrence of name. - void getarr (const char* name, + void getarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! queryktharr() is to querykth() as getktharr() is to getkth(). - int queryktharr (const char* name, - int k, + int queryktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as queryktharr() but searches for last occurrence of name. - int queryarr (const char* name, + int queryarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Add a key 'name' with vector of values 'ref' to the end of the PP table. void addarr (const char* name, const std::vector& ref); /** @@ -794,27 +779,27 @@ public: * values cannot be converted to a double, an error message is * reported and the program halts. */ - void getktharr (const char* name, - int k, + void getktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as getktharr() but searches for last occurrence of name. - void getarr (const char* name, + void getarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! queryktharr() is to querykth() as getktharr() is to getkth(). - int queryktharr (const char* name, - int k, + int queryktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as queryktharr() but searches for last occurrence of name. - int queryarr (const char* name, + int queryarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Add a key 'name' with vector of values 'ref' to the end of the PP table. void addarr (const char* name, const std::vector& ref); /** @@ -829,27 +814,27 @@ public: * values cannot be converted to an std::string, an error message is * reported and the program halts. */ - void getktharr (const char* name, - int k, + void getktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as getktharr() but searches for last occurrence of name. - void getarr (const char* name, + void getarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! queryktharr() is to querykth() as getktharr() is to getkth(). - int queryktharr (const char* name, - int k, + int queryktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as queryktharr() but searches for last occurrence of name.2 - int queryarr (const char* name, + int queryarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Add a key 'name' with vector of values 'ref' to the end of the PP table. void addarr (const char* name, const std::vector& ref); /** @@ -864,27 +849,27 @@ public: * values cannot be converted to an IntVect, an error message is * reported and the program halts. */ - void getktharr (const char* name, - int k, + void getktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as getktharr() but searches for last occurrence of name. - void getarr (const char* name, + void getarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! queryktharr() is to querykth() as getktharr() is to getkth(). - int queryktharr (const char* name, - int k, + int queryktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as queryktharr() but searches for last occurrence of name.2 - int queryarr (const char* name, + int queryarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Add a key 'name' with vector of values 'ref' to the end of the PP table. void addarr (const char* name, const std::vector& ref); /** @@ -899,30 +884,52 @@ public: * values cannot be converted to an Box, an error message is * reported and the program halts. */ - void getktharr (const char* name, - int k, + void getktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as getktharr() but searches for last occurrence of name. - void getarr (const char* name, + void getarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! queryktharr() is to querykth() as getktharr() is to getkth(). - int queryktharr (const char* name, - int k, + int queryktharr (const char* name, + int k, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Same as queryktharr() but searches for last occurrence of name.2 - int queryarr (const char* name, + int queryarr (const char* name, std::vector& ref, - int start_ix = FIRST, - int num_val = ALL) const; + int start_ix = FIRST, + int num_val = ALL) const; //! Add a key 'name' with vector of values 'ref' to the end of the PP table. void addarr (const char* name, const std::vector& refd); + /* + * \brief Query IntVect from array + * + * This reads IntVect from an array (e.g., `8 16 8`), not the format + * using parentheses (e.g., `(8,16,8)`). + */ + int queryarr (const char* name, IntVect& ref) const; + + /* + * \brief Get IntVect from array + * + * This reads IntVect from an array (e.g., `8 16 8`), not the format + * using parentheses (e.g., `(8,16,8)`). + */ + void getarr (const char* name, IntVect& ref) const; + + //! Query RealVect from array + int queryarr (const char* name, RealVect& ref) const; + + //! Get RealVect from array + void getarr (const char* name, RealVect& ref) const; + template void get (const char* name, std::array& ref) const { std::vector v; @@ -1031,9 +1038,297 @@ public: return exist; } + /** + * \brief Query with Parser. If `name` is found, this uses amrex::Parser + * to parse the entire list of empty space separated values as a single + * scalar. The return value indicates whether it's found. Note that + * queryWithParser will be used recursively for unresolved symbols. + */ + int queryWithParser (const char* name, int& ref) const; + int queryWithParser (const char* name, long& ref) const; + int queryWithParser (const char* name, long long& ref) const; + int queryWithParser (const char* name, float& ref) const; + int queryWithParser (const char* name, double& ref) const; + + /** + * \brief Query with Parser. The return value indicates whether it's + * found. Note that queryWithParser will be used for unresolved + * symbols. If the number of elements in the input does not equal to + * `nvals`, it's a runtime error. + */ + int queryarrWithParser (const char* name, int nvals, int* ref) const; + int queryarrWithParser (const char* name, int nvals, long* ref) const; + int queryarrWithParser (const char* name, int nvals, long long* ref) const; + int queryarrWithParser (const char* name, int nvals, float* ref) const; + int queryarrWithParser (const char* name, int nvals, double* ref) const; + template || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v,int> = 0> + int queryarrWithParser (const char* name, int nvals, std::vector& ref) const + { + if (this->contains(name)) { + if (int(ref.size()) < nvals) { ref.resize(nvals); } + return this->queryarrWithParser(name, nvals, ref.data()); + } else { + return 0; + } + } + + /** + * \brief Query with Parser. If `name` is found, this uses amrex::Parser + * to parse the entire list of empty space separated values as a single + * scalar. If not, the value in `ref` will be added to the ParmParse + * database. The return value indicates whether it's found. + */ + template || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v,int> = 0> + int queryAddWithParser (const char* name, T& ref) const + { + int exist = this->queryWithParser(name, ref); + if (!exist) { + this->add(name, ref); + } + return exist; + } + + /** + * \brief Get with Parser. If `name` is found, this uses amrex::Parser + * to parse the entire list of empty space separated values as a single + * scalar. If not, it's a runtime error. + */ + template || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v,int> = 0> + void getWithParser (const char* name, T& ref) const + { + int exist = this->queryWithParser(name, ref); + if (!exist) { + amrex::Error(std::string("ParmParse::getWithParser: failed to get ")+name); + } + } + + /** + * \brief Get with Parser. If `name` is not found, it's a runtime + * error. If the number of elements does not equal to `nvals`, it's also + * a runtime error. + */ + template || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v,int> = 0> + void getarrWithParser (const char* name, int nvals, T* ref) const + { + int exist = this->queryarrWithParser(name, nvals, ref); + if (!exist) { + amrex::Error(std::string("ParmParse::getarrWithParser: failed to get ")+name); + } + } + + /** + * \brief Get with Parser. If `name` is not found, it's a runtime + * error. If the number of elements does not equal to `nvals`, it's also + * a runtime error. + */ + template || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v,int> = 0> + void getarrWithParser (const char* name, int nvals, std::vector& ref) const + { + int exist = this->queryarrWithParser(name, nvals, ref); + if (!exist) { + amrex::Error(std::string("ParmParse::getarrWithParser: failed to get ")+name); + } + } + + /* + * \brief Query two names. + * + * This function queries with `new_name` first. If it's not found, it + * will try again with `old_name`. + */ + template + int query (const char* new_name, const char* old_name, T& ref) + { + return (this->query(new_name, ref) || + this->query(old_name, ref)); + } + + /** + * \brief Get using two names. + * + * This function queries with `new_name` first, If it's not found, it + * will try again with `old_name`. It's an error if neither name is + * found. + */ + template + void get (const char* new_name, const char* old_name, T& ref) + { + auto exist = this->query(new_name, old_name, ref); + if (!exist) { + amrex::ErrorStream() << "ParmParse::get failed to find " + << new_name << " and " << old_name << '\n'; + ParmParse::dumpTable(amrex::ErrorStream()); + amrex::Abort(); + } + } + + /** + * \brief. Query enum value using given name. + * + * Here T is an enum class defined by AMREX_ENUM. The return value + * indicates if `name` is found. An exception is thrown, if the found + * string associated with the name cannot be converted to an enumerator + * (i.e., the string does not match any names in the definition of T). + */ + template , + std::enable_if_t = 0> + int query (const char* name, T& ref) + { + std::string s; + int exist = this->query(name, s); + if (exist) { + try { + ref = amrex::getEnum(s); + } catch (...) { + throw; + } + } + return exist; + } + + /** + * \brief. Get enum value using given name. + * + * Here T is an enum class defined by AMREX_ENUM. It's a runtime error, + * if `name` is not found. An exception is thrown, if the found string + * associated with the name cannot be converted to an enumerator (i.e., + * the string does not match any names in the definition of T). + */ + template , + std::enable_if_t = 0> + void get (const char* name, T& ref) + { + std::string s; + this->get(name, s); + try { + ref = amrex::getEnum(s); + } catch (...) { + throw; + } + } + + //! Query an array of enum values using given name. + template , + std::enable_if_t = 0> + int queryarr (const char* name, std::vector& ref) + { + std::vector s; + int exist = this->queryarr(name, s); + if (exist) { + ref.resize(s.size()); + for (std::size_t i = 0; i < s.size(); ++i) { + ref[i] = amrex::getEnum(s[i]); + } + } + return exist; + } + + //! Get an array of enum values using given name. + template , + std::enable_if_t = 0> + void getarr (const char* name, std::vector& ref) + { + std::vector s; + this->getarr(name, s); + ref.resize(s.size()); + for (std::size_t i = 0; i < s.size(); ++i) { + ref[i] = amrex::getEnum(s[i]); + } + } + + /** + * \brief. Query enum value using given name. + * + * Here T is an enum class defined by AMREX_ENUM. The return value + * indicates if `name` is found. An exception is thrown, if the found + * string associated with the name cannot be case-insensitively + * converted to an enumerator (i.e., the found string, not `name`, does + * not case-insensitively match any names in the definition of T). If + * there are multiple matches, the first one is used. + */ + template , + std::enable_if_t = 0> + int query_enum_case_insensitive (const char* name, T& ref) + { + std::string s; + int exist = this->query(name, s); + if (exist) { + s = amrex::toLower(s); + auto const& enum_names = amrex::getEnumNameStrings(); + auto found = std::find_if(enum_names.begin(), enum_names.end(), + [&] (std::string const& ename) { + return amrex::toLower(ename) == s; + }); + if (found != enum_names.end()) { + ref = static_cast(std::distance(enum_names.begin(), found)); + } else { + std::string msg("query_enum_case_insensitive(\""); + msg.append(name).append("\",").append(amrex::getEnumClassName()) + .append("&) failed."); + throw std::runtime_error(msg); + } + } + return exist; + } + + /** + * \brief. Get enum value using given name. + * + * Here T is an enum class defined by AMREX_ENUM. It's a runtime error, + * if `name` is not found. An exception is thrown, if the found string + * associated with the name cannot be case-insensitively converted to an + * enumerator (i.e., the found string, not `name`, does not + * case-insensitively match any names in the definition of T). If there + * are multiple matches, the first one is used. + */ + template , + std::enable_if_t = 0> + void get_enum_case_insensitive (const char* name, T& ref) + { + int exist = this->query_enum_case_insensitive(name, ref); + if (!exist) { + std::string msg("get_enum_case_insensitive(\""); + msg.append(name).append("\",").append(amrex::getEnumClassName()) + .append("&) failed."); + amrex::Abort(msg); + } + } + //! Remove given name from the table. int remove (const char* name); + //! Make Parser using given string `func` as function body and `vars` as + //! variable names. Constants known to ParmParse will be set. It's a + //! runtime error, if there are unknown symbols in `func`. + [[nodiscard]] Parser makeParser (std::string const& func, + Vector const& vars) const; + + //! Make IParser using given string `func` as function body and `vars` + //! as variable names. Constants known to ParmParse will be set. It's a + //! runtime error, if there are unknown symbols in `func`. + [[nodiscard]] IParser makeIParser (std::string const& func, + Vector const& vars) const; + /** * \brief Construct an initial ParmParse object from the argc and argv * passed in to main(). An error will be signalled if another @@ -1041,14 +1336,28 @@ public: * read the parameters in from that file first and then append * those derived from argv to the table. */ - static void Initialize(int argc, - char** argv, - const char* parfile); + static void Initialize (int argc, char** argv, const char* parfile); /** * \brief The destructor. The internal static table will only be deleted * if there are no other ParmParse objects in existence. */ - static void Finalize(); + static void Finalize (); + + //! Set prefix used by math expression Parser + static void SetParserPrefix (std::string a_prefix); + + static int Verbose (); + static void SetVerbose (int v); + + //! Write the contents of the table in ASCII to the ostream. + static void dumpTable (std::ostream& os, bool prettyPrint = false); + + //! Write the table in a pretty way to the ostream. If there are + //! duplicates, only the last one is printed. + static void prettyPrintTable (std::ostream& os); + + //! Add keys and values from a file to the end of the PP table. + static void addfile (std::string const& filename); static bool QueryUnusedInputs (); @@ -1061,79 +1370,32 @@ public: //! Returns [prefix.]* parameters. [[nodiscard]] static std::set getEntries (const std::string& prefix = std::string()); - struct PP_entry; - using Table = std::list; - static void appendTable(ParmParse::Table& tab); - [[nodiscard]] const Table& table() const {return *m_table;} - -protected: + struct PP_entry { + // There can be multiple occurrences for a given name (e.g., + // multiple lines starting with `foo =` in inputs. For each + // occurrence, there can be multiple values. Thus, the use of + // vector>. + std::vector> m_vals; + mutable Long m_count = 0; + }; + using Table = std::unordered_map; - friend class Frame; - friend class Record; + [[nodiscard]] const Table& table() const {return *m_table;} - explicit ParmParse (Table& a_table); - // - //! Set/Get the prefix. - [[nodiscard]] std::string getPrefix() const; - std::string setPrefix(const std::string& str); - void pushPrefix(const std::string& str); - void popPrefix(); - [[nodiscard]] std::string prefixedName (const std::string& str) const; - // - //! Prefix used in keyword search. - std::stack m_pstack; - Table* m_table; -}; + //! keyword for files to load + static std::string const FileKeyword; -struct ParmParse::PP_entry -{ - PP_entry (std::string name, const std::list& vals); - PP_entry (std::string name, const std::string& vals); - PP_entry (std::string name, const std::list& table); - PP_entry (const PP_entry& pe); - PP_entry& operator= (const PP_entry& pe); - PP_entry (PP_entry&&) = delete; - PP_entry& operator= (PP_entry&&) = delete; - ~PP_entry (); - [[nodiscard]] std::string print() const; - - std::string m_name; - std::vector m_vals; - Table* m_table; - mutable bool m_queried; -}; + static std::string ParserPrefix; +protected: -class ParmParse::Frame -{ -public: - Frame (ParmParse& pp, const std::string& pfix); - ~Frame (); - Frame (Frame const&) = default; - Frame (Frame&&) = delete; - Frame& operator= (Frame const&) = delete; - Frame& operator= (Frame &&) = delete; - void push(const std::string& str); - void pop(); - [[nodiscard]] std::string getPrefix() const; -private: - ParmParse* m_pp; - int m_np{0}; -}; + [[nodiscard]] std::string prefixedName (const std::string_view& str) const; -class ParmParse::Record -{ -public: - [[nodiscard]] const ParmParse* operator->() const; - [[nodiscard]] const ParmParse& operator* () const; -private: - friend class ParmParse; - explicit Record (const ParmParse& pp); - ParmParse m_pp; + std::string m_prefix; // Prefix used in keyword search + std::string m_parser_prefix; // Prefix used by Parser + Table* m_table; }; -std::ostream& operator<< (std::ostream& os, const ParmParse::PP_entry& pp); - } -#endif /*BL_PARMPARSE_H*/ +#endif /* AMREX_PARMPARSE_H_ */ diff --git a/Src/Base/AMReX_ParmParse.cpp b/Src/Base/AMReX_ParmParse.cpp index a2b42b64a15..df1e18e9b08 100644 --- a/Src/Base/AMReX_ParmParse.cpp +++ b/Src/Base/AMReX_ParmParse.cpp @@ -1,28 +1,23 @@ -#include #include -#include +#include #include -#include -#include +#include +#include #include +#include +#include #include #include -#include -#include -#include -#include #include -#include -#include +#include #include +#include #include -#include -#include #include -#include +#include #include -#include +#include extern "C" void amrex_init_namelist (const char*); extern "C" void amrex_finalize_namelist (); @@ -30,117 +25,55 @@ extern "C" void amrex_finalize_namelist (); namespace amrex { namespace { -#ifdef AMREX_XSDK - bool finalize_verbose = false; -#else - bool finalize_verbose = true; -#endif + bool initialized = false; + ParmParse::Table g_table; + std::vector> g_parser_recursive_symbols; + namespace pp_detail { + int verbose = -1; + } } std::string const ParmParse::FileKeyword = "FILE"; +std::string ParmParse::ParserPrefix; -// -// Used by constructor to build table. -// -ParmParse::PP_entry::PP_entry (std::string name, const std::list& vals) - : - m_name(std::move(name)), - m_vals(vals.begin(), vals.end()), - m_table(nullptr), - m_queried(false) -{ -} - -ParmParse::PP_entry::PP_entry (std::string name, const std::string& val) - : - m_name(std::move(name)), - m_vals({val}), - m_table(nullptr), - m_queried(false) -{ -} +ParmParse::ParmParse (std::string prefix, std::string parser_prefix) + : m_prefix(std::move(prefix)), + m_parser_prefix(std::move(parser_prefix)), + m_table(&g_table) +{} -ParmParse::PP_entry::PP_entry (std::string name, const std::list& table) - : - m_name(std::move(name)), - m_table(new Table(table)), - m_queried(false) -{ -} - -ParmParse::PP_entry::PP_entry (const PP_entry& pe) - : m_name(pe.m_name), - m_vals(pe.m_vals), - m_table(nullptr), - m_queried(pe.m_queried) -{ - if ( pe.m_table ) - { - m_table = new Table(*pe.m_table); - } -} - -ParmParse::PP_entry::~PP_entry () +namespace { - delete m_table; -} -ParmParse::PP_entry& -ParmParse::PP_entry::operator= (const PP_entry& pe) +std::string pp_to_pretty_string (std::string const& name, + std::vector const& vals) { - if ( &pe == this ) { return *this; } - m_name = pe.m_name; - m_vals = pe.m_vals; - m_table = nullptr; - m_queried = pe.m_queried; - if ( pe.m_table ) - { - m_table = new Table(*pe.m_table); + std::stringstream ss; + ss << name << " ="; + for (auto const& v : vals) { + ss << " " << v; } - return *this; + return ss.str(); } -std::string -ParmParse::PP_entry::print () const { - std::stringstream t; - t << m_name << " = "; - int n = static_cast(m_vals.size()); - for ( int i = 0; i < n; i++) - { - t << m_vals[i]; - if ( i < n-1 ) { t << " "; } - } - return t.str(); -} - -std::ostream& -operator<< (std::ostream& os, const ParmParse::PP_entry& pp) +std::string pp_to_string (std::string const& name, + std::vector const& vals) { - os << pp.m_name << "(nvals = " << pp.m_vals.size() << ") " << " :: ["; - int n = static_cast(pp.m_vals.size()); - for ( int i = 0; i < n; i++ ) - { - os << pp.m_vals[i]; - if ( i < n-1 ) { os << ", "; } + std::stringstream ss; + ss << name << "(nvals = " << vals.size() << ") " << " :: ["; + for (std::size_t i = 0; i < vals.size(); ++i) { + ss << vals[i]; + if ( i < vals.size()-1 ) { ss << ", "; } } - os << "]"; - - if ( !os ) - { - amrex::Error("write on ostream failed"); - } - return os; + ss << "]"; + return ss.str(); } -namespace -{ enum PType { pDefn, - pValue, pEQ_sign, - pOpenBracket, - pCloseBracket, + pValue, pEOF }; @@ -208,12 +141,13 @@ template <> bool is (const std::string& str, bool& val) { - if ( str == "true" || str == "t" ) + auto const lo_str = amrex::toLower(str); + if ( lo_str == "true" || lo_str == "t" ) { val = true; return true; } - if ( str == "false" || str == "f" ) + if ( lo_str == "false" || lo_str == "f" ) { val = false; return true; @@ -233,8 +167,6 @@ is (const std::string& str, bool& val) return false; } -ParmParse::Table g_table; - template const char* tok_name(const T&) { return typeid(T).name(); } template const char* tok_name(std::vector&) { return tok_name(T());} @@ -348,16 +280,6 @@ getToken (const char*& str, std::string& ostr, int& num_linefeeds) ostr += ch; str++; pcnt = 1; state = LIST; } - else if ( ch == '{' ) - { - str++; - return pOpenBracket; - } - else if ( ch == '}' ) - { - str++; - return pCloseBracket; - } else if ( std::isalpha(ch) ) { ostr += ch; str++; @@ -433,107 +355,52 @@ getToken (const char*& str, std::string& ostr, int& num_linefeeds) } } - -// -// Keyword aware string comparison. -// - - -bool -ppfound (const std::string& keyword, - const ParmParse::PP_entry& pe, - bool recordQ) -{ - return (recordQ == (pe.m_table!=nullptr)) && (keyword == pe.m_name); -} - // // Return the index of the n'th occurrence of a parameter name, // except if n==-1, return the index of the last occurrence. // Return 0 if the specified occurrence does not exist. // -const ParmParse::PP_entry* -ppindex (const ParmParse::Table& table, - int n, - const std::string& name, - bool recordQ) +std::vector const* +ppindex (const ParmParse::Table& table, int n, const std::string& name) { - const ParmParse::PP_entry* fnd = nullptr; + auto found = table.find(name); + if (found == table.cend()) { return nullptr; } - if ( n == ParmParse::LAST ) - { - // - // Search from back of list. - // - for (auto li = table.crbegin(), REnd = table.crend(); li != REnd; ++li) - { - if ( ppfound(name, *li, recordQ) ) - { - fnd = &*li; - break; - } - } - } - else - { - for (auto const& li : table) - { - if ( ppfound(name, li, recordQ) ) - { - fnd = &li; - if ( --n < 0 ) - { - break; - } - } - } - if ( n >= 0) - { - fnd = nullptr; - } - } +#ifdef AMREX_USE_OMP +#pragma omp atomic update +#endif + ++(found->second.m_count); - if ( fnd ) - { - // - // Found an entry; mark all occurrences of name as used. - // - for (const auto& li : table) - { - if ( ppfound(name, li, recordQ) ) - { - li.m_queried = true; - } - } + if (n == ParmParse::LAST) { + return &(found->second.m_vals.back()); + } else { + return &(found->second.m_vals[n]); } - return fnd; } -void -bldTable (const char*& str, std::list& tab); +void bldTable (const char*& str, ParmParse::Table& tab); -namespace { - bool isTrue(std::smatch const& sm) { - const std::string op = sm[1].str(); - const int dim = std::stoi(sm[2].str()); - if (op == "<") { - return AMREX_SPACEDIM < dim; - } else if (op == ">") { - return AMREX_SPACEDIM > dim; - } else if (op == "==") { - return AMREX_SPACEDIM == dim; - } else if (op == "<=") { - return AMREX_SPACEDIM <= dim; - } else if (op == ">=") { - return AMREX_SPACEDIM >= dim; - } else { - return false; - } +bool isTrue(std::smatch const& sm) +{ + const std::string op = sm[1].str(); + const int dim = std::stoi(sm[2].str()); + if (op == "<") { + return AMREX_SPACEDIM < dim; + } else if (op == ">") { + return AMREX_SPACEDIM > dim; + } else if (op == "==") { + return AMREX_SPACEDIM == dim; + } else if (op == "<=") { + return AMREX_SPACEDIM <= dim; + } else if (op == ">=") { + return AMREX_SPACEDIM >= dim; + } else { + return false; } } void -read_file (const char* fname, std::list& tab) +read_file (const char* fname, ParmParse::Table& tab) { // // Space for input file if it exists. @@ -629,9 +496,7 @@ read_file (const char* fname, std::list& tab) } void -addDefn (std::string& def, - std::list& val, - std::list& tab) +addDefn (std::string& def, std::vector& val, ParmParse::Table& tab) { // // Check that defn exists. @@ -662,7 +527,7 @@ addDefn (std::string& def, } else { - tab.emplace_back(def,val); + tab[def].m_vals.push_back(val); } val.clear(); if ( def != ParmParse::FileKeyword ) { @@ -671,54 +536,23 @@ addDefn (std::string& def, } void -addTable (std::string& def, - ParmParse::Table& val, - std::list& tab) -{ - if ( def.empty() ) - { - val.clear(); - return; - } - // - // Check that it has values. - // - if ( val.empty() ) - { - amrex::ErrorStream() << "ParmParse::addTable(): no values for Table " << def << "\n"; - amrex::Abort(); - } - tab.emplace_back(def, val); - val.clear(); - def = std::string(); -} - -void -bldTable (const char*& str, - std::list& tab) +bldTable (const char*& str, ParmParse::Table& tab) { - std::string cur_name; - std::list cur_list; - ParmParse::Table cur_table; - std::string tmp_str; - std::vector cur_linefeeds; + std::string cur_name; + std::vector cur_list; + std::vector cur_linefeeds; for (;;) { std::string tokname; int num_linefeeds; - PType token = getToken(str,tokname, num_linefeeds); + PType token = getToken(str, tokname, num_linefeeds); switch (token) { - case pCloseBracket: - if ( !cur_name.empty() && cur_list.empty() ) - { - amrex::Abort("ParmParse::bldTable() defn with no list"); - } - AMREX_FALLTHROUGH; case pEOF: + { if (std::accumulate(cur_linefeeds.begin(), cur_linefeeds.end(), int(0)) > 0) { std::string error_message("ParmParse: Multiple lines in "); @@ -731,33 +565,9 @@ bldTable (const char*& str, } addDefn(cur_name,cur_list,tab); return; - case pOpenBracket: - if ( cur_name.empty() ) - { - amrex::Abort("ParmParse::bldTabe() '{' with no blocknamne"); - } - if ( !cur_list.empty() ) - { - tmp_str = cur_list.back(); - cur_list.pop_back(); - cur_linefeeds.pop_back(); - if (std::accumulate(cur_linefeeds.begin(), cur_linefeeds.end(), int(0)) > 0) - { - std::string error_message("ParmParse: Multiple lines in "); - error_message.append(cur_name).append(" ="); - for (auto const& x : cur_list) { - error_message.append(" ").append(x); - } - error_message.append(". Must use \\ for line continuation."); - amrex::Abort(error_message); - } - addDefn(cur_name, cur_list, tab); - cur_name = tmp_str; - } - bldTable(str, cur_table); - addTable(cur_name, cur_table, tab); - break; + } case pEQ_sign: + { if ( cur_name.empty() ) { amrex::Abort("ParmParse::bldTable() EQ with no current defn"); @@ -767,7 +577,7 @@ bldTable (const char*& str, // // Read one too far, remove last name on list. // - tmp_str = cur_list.back(); + auto tmp_str = cur_list.back(); cur_list.pop_back(); cur_linefeeds.pop_back(); if (std::accumulate(cur_linefeeds.begin(), cur_linefeeds.end(), int(0)) > 0) @@ -781,11 +591,13 @@ bldTable (const char*& str, amrex::Abort(error_message); } addDefn(cur_name,cur_list,tab); - cur_name = tmp_str; + cur_name = std::move(tmp_str); } cur_linefeeds.clear(); break; + } case pDefn: + { if ( cur_name.empty() ) { cur_name = tokname; @@ -795,7 +607,9 @@ bldTable (const char*& str, // Otherwise, fall through, this may be a string. // AMREX_FALLTHROUGH; + } case pValue: + { if ( cur_name.empty() ) { std::string msg("ParmParse::bldTable(): value with no defn: "); @@ -806,23 +620,28 @@ bldTable (const char*& str, cur_linefeeds.push_back(num_linefeeds); break; } + } // switch (token) } } -namespace -{ +template +bool pp_parser (const ParmParse::Table& table, const std::string& parser_prefix, + const std::string& name, const std::string& val, T& ref, + bool use_querywithparser); + template bool squeryval (const ParmParse::Table& table, - const std::string& name, - T& ref, - int ival, - int occurrence) + const std::string& parser_prefix, + const std::string& name, + T& ref, + int ival, + int occurrence) { // // Get last occurrence of name in table. // - const ParmParse::PP_entry* def = ppindex(table, occurrence, name, false); + auto const* def = ppindex(table, occurrence, name); if ( def == nullptr ) { return false; @@ -830,10 +649,10 @@ squeryval (const ParmParse::Table& table, // // Does it have ival values? // - if ( ival >= static_cast(def->m_vals.size()) ) + if ( ival >= static_cast(def->size()) ) { amrex::ErrorStream() << "ParmParse::queryval no value number" - << ival << " for "; + << ival << " for "; if ( occurrence == ParmParse::LAST ) { amrex::ErrorStream() << "last occurrence of "; @@ -842,17 +661,30 @@ squeryval (const ParmParse::Table& table, { amrex::ErrorStream() << " occurrence " << occurrence << " of "; } - amrex::ErrorStream() << def->m_name << '\n' << *def << '\n'; + amrex::ErrorStream() << name << '\n' << pp_to_string(name,*def) << '\n'; amrex::Abort(); } - const std::string& valname = def->m_vals[ival]; + const std::string& valname = (*def)[ival]; bool ok = is(valname, ref); if ( !ok ) { + if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) + { + if (pp_parser(table, parser_prefix, name, valname, ref, false)) { + return true; + } + } else { + amrex::ignore_unused(parser_prefix); + } + amrex::ErrorStream() << "ParmParse::queryval type mismatch on value number " - << ival << " of " << '\n'; + << ival << " of " << '\n'; if ( occurrence == ParmParse::LAST ) { amrex::ErrorStream() << " last occurrence of "; @@ -861,12 +693,12 @@ squeryval (const ParmParse::Table& table, { amrex::ErrorStream() << " occurrence number " << occurrence << " of "; } - amrex::ErrorStream() << def->m_name << '\n'; + amrex::ErrorStream() << name << '\n'; amrex::ErrorStream() << " Expected an \"" - << tok_name(ref) - << "\" type which can't be parsed from the string \"" - << valname << "\"\n" - << *def << '\n'; + << tok_name(ref) + << "\" type which can't be parsed from the string \"" + << valname << "\"\n" + << pp_to_string(name,*def) << '\n'; amrex::Abort(); } return true; @@ -875,25 +707,26 @@ squeryval (const ParmParse::Table& table, template void sgetval (const ParmParse::Table& table, - const std::string& name, - T& ref, - int ival, - int occurrence) + const std::string& parser_prefix, + const std::string& name, + T& ref, + int ival, + int occurrence) { - if ( squeryval(table, name,ref,ival,occurrence) == 0 ) + if ( squeryval(table, parser_prefix, name,ref,ival,occurrence) == 0 ) { amrex::ErrorStream() << "ParmParse::getval "; if ( occurrence >= 0 ) { amrex::ErrorStream() << "occurrence number " - << occurrence - << " of "; + << occurrence + << " of "; } amrex::ErrorStream() << "ParmParse::getval(): " - << name - << " not found in table" - << '\n'; + << name + << " not found in table" + << '\n'; ParmParse::dumpTable(amrex::ErrorStream()); amrex::Abort(); } @@ -902,16 +735,17 @@ sgetval (const ParmParse::Table& table, template bool squeryarr (const ParmParse::Table& table, - const std::string& name, - std::vector& ref, - int start_ix, - int num_val, - int occurrence) + const std::string& parser_prefix, + const std::string& name, + std::vector& ref, + int start_ix, + int num_val, + int occurrence) { // // Get last occurrence of name in table. // - const ParmParse::PP_entry *def = ppindex(table,occurrence, name, false); + auto const* def = ppindex(table,occurrence, name); if ( def == nullptr ) { return false; @@ -922,7 +756,7 @@ squeryarr (const ParmParse::Table& table, // if ( num_val == ParmParse::ALL ) { - num_val = static_cast(def->m_vals.size()); + num_val = static_cast(def->size()); } if ( num_val == 0 ) { return true; } @@ -932,7 +766,7 @@ squeryarr (const ParmParse::Table& table, { ref.resize(stop_ix + 1); } - if ( stop_ix >= static_cast(def->m_vals.size()) ) + if ( stop_ix >= static_cast(def->size()) ) { amrex::ErrorStream() << "ParmParse::queryarr too many values requested for"; if ( occurrence == ParmParse::LAST ) @@ -943,17 +777,30 @@ squeryarr (const ParmParse::Table& table, { amrex::ErrorStream() << " occurrence " << occurrence << " of "; } - amrex::ErrorStream() << def->m_name << '\n' << *def << '\n'; + amrex::ErrorStream() << name << '\n' << pp_to_string(name,*def) << '\n'; amrex::Abort(); } for ( int n = start_ix; n <= stop_ix; n++ ) { - const std::string& valname = def->m_vals[n]; + const std::string& valname = (*def)[n]; bool ok = is(valname, ref[n]); if ( !ok ) { + if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) + { + if (pp_parser(table, parser_prefix, name, valname, ref[n], false)) { + continue; + } + } else { + amrex::ignore_unused(parser_prefix); + } + amrex::ErrorStream() << "ParmParse::queryarr type mismatch on value number " - << n << " of "; + << n << " of "; if ( occurrence == ParmParse::LAST ) { amrex::ErrorStream() << " last occurrence of "; @@ -962,12 +809,12 @@ squeryarr (const ParmParse::Table& table, { amrex::ErrorStream() << " occurrence number " << occurrence << " of "; } - amrex::ErrorStream() << def->m_name << '\n'; + amrex::ErrorStream() << name << '\n'; amrex::ErrorStream() << " Expected an \"" - << tok_name(ref) - << "\" type which can't be parsed from the string \"" - << valname << "\"\n" - << *def << '\n'; + << tok_name(ref) + << "\" type which can't be parsed from the string \"" + << valname << "\"\n" + << pp_to_string(name,*def) << '\n'; amrex::Abort(); } } @@ -976,10 +823,15 @@ squeryarr (const ParmParse::Table& table, template void -sgetarr (const ParmParse::Table& table, const std::string& name, - std::vector& ref, int start_ix, int num_val, int occurrence) +sgetarr (const ParmParse::Table& table, + const std::string& parser_prefix, + const std::string& name, + std::vector& ref, + int start_ix, + int num_val, + int occurrence) { - if ( squeryarr(table,name,ref,start_ix,num_val,occurrence) == 0 ) + if ( squeryarr(table,parser_prefix,name,ref,start_ix,num_val,occurrence) == 0 ) { amrex::ErrorStream() << "ParmParse::sgetarr "; if ( occurrence >= 0 ) @@ -987,9 +839,9 @@ sgetarr (const ParmParse::Table& table, const std::string& name, amrex::ErrorStream() << "occurrence number " << occurrence << " of "; } amrex::ErrorStream() << "ParmParse::sgetarr(): " - << name - << " not found in table" - << '\n'; + << name + << " not found in table" + << '\n'; ParmParse::dumpTable(amrex::ErrorStream()); amrex::Abort(); } @@ -1001,37 +853,30 @@ saddval (const std::string& name, const T& ref) { std::stringstream val; val << std::setprecision(17) << ref; - ParmParse::PP_entry entry(name,val.str()); - entry.m_queried=true; - g_table.push_back(entry); -} + auto& entry = g_table[name]; + entry.m_vals.emplace_back(std::vector{val.str()}); + ++entry.m_count; +} template void saddarr (const std::string& name, const std::vector& ref) { - std::list arr; + std::vector arr; + arr.reserve(ref.size()); for (auto const& item : ref) { std::stringstream val; val << std::setprecision(17) << item; arr.push_back(val.str()); } - ParmParse::PP_entry entry(name,arr); - entry.m_queried=true; - g_table.push_back(entry); -} + auto& entry = g_table[name]; + entry.m_vals.emplace_back(std::move(arr)); + ++entry.m_count; } -// // Initialize ParmParse. -// - -namespace { - bool initialized = false; -} - void ppinit (int argc, char** argv, const char* parfile, ParmParse::Table& table) { @@ -1049,202 +894,163 @@ ppinit (int argc, char** argv, const char* parfile, ParmParse::Table& table) argstr += argv[i]; argstr += SPACE; } - std::list arg_table; + ParmParse::Table arg_table; const char* b = argstr.c_str(); bldTable(b, arg_table); // // Append arg_table to end of existing table. // - table.splice(table.end(), arg_table); + for (auto& [name, arg_entry] : arg_table) { + auto& src = arg_entry.m_vals; + auto& dst = table[name].m_vals; + std::move(std::begin(src), std::end(src), std::back_inserter(dst)); + } } initialized = true; } -} // End of unnamed namespace. - -std::string -ParmParse::prefixedName (const std::string& str) const +bool unused_table_entries_q (const ParmParse::Table& table, + const std::string& prefix = std::string()) { - if ( str.empty() ) - { - amrex::Error("ParmParse::prefixedName: has empty name"); - } - if ( !m_pstack.top().empty()) - { - return m_pstack.top() + '.' + str; + if (prefix.empty()) { + return std::any_of(table.begin(), table.end(), + [] (auto const& x) -> bool { + return x.second.m_count == 0; + }); + } else { + auto s = prefix + '.'; + return std::any_of(table.begin(), table.end(), + [&] (auto const& x) -> bool { + return x.second.m_count == 0 + && x.first.substr(0,s.size()) == s; + }); } - return str; } -void -ParmParse::addfile (std::string const& filename) { -#ifdef AMREX_USE_MPI - if (ParallelDescriptor::Communicator() == MPI_COMM_NULL) - { - throw std::runtime_error("ParmParse::addfile: AMReX must be initialized"); +void pp_print_unused (const std::string& pfx, const ParmParse::Table& table) +{ + std::vector sorted_names; + sorted_names.reserve(table.size()); + for (auto const& [name, entry] : table) { + if (entry.m_count == 0) { + sorted_names.push_back(name); + } } -#endif - - auto l = std::list{filename}; - auto file = FileKeyword; - addDefn(file, - l, - g_table); -} + std::sort(sorted_names.begin(), sorted_names.end()); -void -ParmParse::pushPrefix (const std::string& str) -{ - std::string s(str); - if ( !s.empty() ) - { - if ( !m_pstack.top().empty() ) - { - s = m_pstack.top() + "." + s; + for (auto const& name : sorted_names) { + auto const& entry = table.at(name); + for (auto const& vals : entry.m_vals) { + amrex::AllPrint() << pfx << "::" << pp_to_string(name, vals) << '\n'; } - m_pstack.push(s); } } -void -ParmParse::popPrefix () -{ - if ( m_pstack.size() <= 1 ) - { - amrex::Error("ParmParse::popPrefix: stack underflow"); +template +bool squeryWithParser (const ParmParse::Table& table, + const std::string& parser_prefix, + const std::string& name, + T& ref); + +template , + IParser, Parser>> +PARSER_t +pp_make_parser (std::string const& func, Vector const& vars, + ParmParse::Table const& table, std::string const& parser_prefix, + bool use_querywithparser) +{ + using value_t = std::conditional_t, long long, double>; + + std::vector prefixes; + prefixes.reserve(3); + prefixes.emplace_back(); + if (! parser_prefix.empty()) { + prefixes.emplace_back(parser_prefix+"."); + } + if (! ParmParse::ParserPrefix.empty()) { + prefixes.emplace_back(ParmParse::ParserPrefix+"."); } - m_pstack.pop(); -} -std::string -ParmParse::getPrefix() const -{ - return m_pstack.top(); -} + PARSER_t parser(func); -ParmParse::ParmParse (const std::string& prefix) - : - m_table(&g_table) -{ - m_pstack.push(prefix); -} + auto symbols = parser.symbols(); + for (auto const& var : vars) { + symbols.erase(var); + } -ParmParse::ParmParse (Table& a_table) - : m_table(&a_table) -{ - m_pstack.emplace(""); -} + for (auto const& s : symbols) { + value_t v = 0; + bool r = false; + for (auto const& pf : prefixes) { + if (use_querywithparser) { + r = squeryWithParser(table, parser_prefix, pf+s, v); + } else { + r = squeryval(table, parser_prefix, pf+s, v, + ParmParse::FIRST, ParmParse::LAST); + } + if (r) { break; } + } + if (r == false) { + amrex::Error("ParmParse: failed to parse " + func); + } + parser.setConstant(s, v); + } + if (!vars.empty()) { + parser.registerVariables(vars); + } -ParmParse::Frame::Frame (ParmParse& pp, const std::string& pfix) - : - m_pp(&pp) -{ - push(pfix); - BL_ASSERT( m_np == 1 ); + return parser; } -ParmParse::Frame::~Frame () +template +bool pp_parser (const ParmParse::Table& table, const std::string& parser_prefix, + const std::string& name, const std::string& val, T& ref, + bool use_querywithparser) { - BL_ASSERT( m_np > 0 ); - while ( m_np ) - { - pop(); + auto& recursive_symbols = g_parser_recursive_symbols[OpenMP::get_thread_num()]; + if (auto found = recursive_symbols.find(name); found != recursive_symbols.end()) { + amrex::Error("ParmParse: recursive reference to "+name+" is not allowed"); + return false; + } else { + recursive_symbols.insert(name); } - BL_ASSERT( m_np == 0 ); -} -void -ParmParse::Frame::push (const std::string& str) -{ - m_pp->pushPrefix(str); - m_np++; -} + auto parser = pp_make_parser(val, {}, table, parser_prefix, use_querywithparser); + auto exe = parser.template compileHost<0>(); + ref = static_cast(exe()); -void -ParmParse::Frame::pop () -{ - BL_ASSERT( m_np > 0); - m_pp->popPrefix(); - m_np--; + recursive_symbols.erase(name); + return true; } -std::string -ParmParse::Frame::getPrefix () const -{ - return m_pp->getPrefix(); -} +} // End of unnamed namespace. -void -ParmParse::appendTable(ParmParse::Table& tab) +std::string +ParmParse::prefixedName (const std::string_view& str) const { - g_table.splice(g_table.end(), tab); -} + AMREX_ASSERT( ! str.empty() ); -namespace { - -bool -unused_table_entries_q (const ParmParse::Table& table, const std::string& prefix = std::string()) -{ - for (auto const& li : table) // NOLINT(readability-use-anyofallof) - { - if ( li.m_table ) - { - if ( !li.m_queried ) - { - if (prefix.empty()) { - return true; - } else { - if (li.m_name.substr(0,prefix.size()+1) == prefix+".") { - return true; - } - } - } - else - { - if (unused_table_entries_q(*li.m_table, prefix)) { return true; } - } - } - else if ( !li.m_queried ) - { - if (prefix.empty()) { - return true; - } else { - if (li.m_name.substr(0,prefix.size()+1) == prefix+".") { - return true; - } - } - } + if (m_prefix.empty()) { + return std::string(str); + } else { + std::string r = m_prefix + '.'; + r.append(str); + return r; } - return false; } void -finalize_table (const std::string& pfx, const ParmParse::Table& table) -{ - for (auto const& li : table) +ParmParse::addfile (std::string const& filename) { +#ifdef AMREX_USE_MPI + if (ParallelDescriptor::Communicator() == MPI_COMM_NULL) { - if ( li.m_table ) - { - if ( !li.m_queried ) - { - if (finalize_verbose) { - amrex::AllPrint() << "Record " << li.m_name << '\n'; - } - } - else - { - finalize_table(pfx + "::" + li.m_name, *li.m_table); - } - } - else if ( !li.m_queried ) - { - if (finalize_verbose) { - amrex::AllPrint() << pfx << "::" << li << '\n'; - } - } + throw std::runtime_error("ParmParse::addfile: AMReX must be initialized"); } -} +#endif + auto file = FileKeyword; + std::vector val{{filename}}; + addDefn(file, val, g_table); } void @@ -1257,6 +1063,8 @@ ParmParse::Initialize (int argc, amrex::Error("ParmParse::Initialize(): already initialized!"); } + g_parser_recursive_symbols.resize(OpenMP::get_max_threads()); + ppinit(argc, argv, parfile, g_table); amrex::ExecOnFinalize(ParmParse::Finalize); @@ -1267,10 +1075,11 @@ ParmParse::QueryUnusedInputs () { if ( ParallelDescriptor::IOProcessor() && unused_table_entries_q(g_table)) { - finalize_verbose = amrex::system::verbose; - if (finalize_verbose) { amrex::OutStream() << "Unused ParmParse Variables:\n"; } - finalize_table(" [TOP]", g_table); - if (finalize_verbose) { amrex::OutStream() << '\n'; } + if (ParmParse::Verbose()) { + amrex::OutStream() << "Unused ParmParse Variables:\n"; + pp_print_unused(" [TOP]", g_table); + amrex::OutStream() << '\n'; + } return true; } return false; @@ -1282,51 +1091,66 @@ ParmParse::hasUnusedInputs (const std::string& prefix) return unused_table_entries_q(g_table, prefix); } -namespace { -void -get_entries_under_prefix (std::vector& found_entries, - const ParmParse::Table& table, - const std::string& prefix, - const bool only_unused = false, - const bool add_values = false) +std::vector +ParmParse::getUnusedInputs (const std::string& prefix) { + std::vector sorted_names; const std::string prefixdot = prefix.empty() ? std::string() : prefix+"."; - for (auto const& entry : table) { - if ((! only_unused) || (only_unused && ! entry.m_queried)) { - if (entry.m_name.substr(0,prefixdot.size()) == prefixdot) { - std::string tmp(entry.m_name); - if (add_values) { - tmp.append(" ="); - for (auto const& v : entry.m_vals) { - tmp += " " + v; - } - } - found_entries.emplace_back(std::move(tmp)); - } + for (auto const& [name, entry] : g_table) { + if (entry.m_count == 0 && + name.substr(0,prefixdot.size()) == prefixdot) + { + sorted_names.push_back(name); } + } + std::sort(sorted_names.begin(), sorted_names.end()); - if (entry.m_table) { - get_entries_under_prefix(found_entries, table, prefix, - only_unused, add_values); + std::vector r; + for (auto const& name : sorted_names) { + auto const& entry = g_table[name]; + for (auto const& vals : entry.m_vals) { + std::string tmp(name); + tmp.append(" ="); + for (auto const& v : vals) { + tmp += " " + v; + } + r.emplace_back(std::move(tmp)); } } -} -} -std::vector -ParmParse::getUnusedInputs (const std::string& prefix) -{ - std::vector r; - get_entries_under_prefix(r, g_table, prefix, true, true); return r; } std::set ParmParse::getEntries (const std::string& prefix) { - std::vector r; - get_entries_under_prefix(r, g_table, prefix, false, false); - return std::set(r.begin(), r.end()); + std::set r; + const std::string prefixdot = prefix.empty() ? std::string() : prefix+"."; + for (auto const& [name, entry] : g_table) { + if (name.substr(0,prefixdot.size()) == prefixdot) { + r.insert(name); + } + } + return r; +} + +int +ParmParse::Verbose () +{ + if (pp_detail::verbose < 0) { + pp_detail::verbose = std::max(amrex::Verbose(),0); + ParmParse pp("amrex.parmparse"); + if (! pp.query("verbose", "v", pp_detail::verbose)) { + pp.add("verbose", pp_detail::verbose); + } + } + return pp_detail::verbose; +} + +void +ParmParse::SetVerbose (int v) +{ + pp_detail::verbose = v; } void @@ -1334,13 +1158,11 @@ ParmParse::Finalize () { if ( ParallelDescriptor::IOProcessor() && unused_table_entries_q(g_table)) { - finalize_verbose = amrex::system::verbose; - if (finalize_verbose) { amrex::OutStream() << "Unused ParmParse Variables:\n"; } - finalize_table(" [TOP]", g_table); - if (finalize_verbose) { amrex::OutStream() << '\n'; } - // - // First loop through and delete all queried entries. - // + if (ParmParse::Verbose()) { + amrex::OutStream() << "Unused ParmParse Variables:\n"; + pp_print_unused(" [TOP]", g_table); + amrex::OutStream() << '\n'; + } if (amrex::system::abort_on_unused_inputs) { amrex::Abort("ERROR: unused ParmParse variables."); } @@ -1351,19 +1173,65 @@ ParmParse::Finalize () amrex_finalize_namelist(); #endif + g_parser_recursive_symbols.clear(); + + pp_detail::verbose = -1; initialized = false; } +void +ParmParse::SetParserPrefix (std::string a_prefix) +{ + ParmParse::ParserPrefix = std::move(a_prefix); +} + void ParmParse::dumpTable (std::ostream& os, bool prettyPrint) { - for (auto const& li : g_table) - { - if(prettyPrint && li.m_queried) { - os << li.print() << '\n'; + std::vector sorted_names; + sorted_names.reserve(g_table.size()); + for (auto const& [name, entry] : g_table) { + sorted_names.push_back(name); + } + std::sort(sorted_names.begin(), sorted_names.end()); + + for (auto const& name : sorted_names) { + auto const& entry = g_table[name]; + if (prettyPrint && entry.m_count > 0) { + for (auto const& vals : entry.m_vals) { + os << pp_to_pretty_string(name, vals) << '\n'; + } } else { - os << li << '\n'; + for (auto const& vals : entry.m_vals) { + os << pp_to_string(name, vals) << '\n'; + } + } + } +} + +void +ParmParse::prettyPrintTable (std::ostream& os) +{ + std::vector sorted_names; + sorted_names.reserve(g_table.size()); + for (auto const& [name, entry] : g_table) { + sorted_names.push_back(name); + } + std::sort(sorted_names.begin(), sorted_names.end()); + + for (auto const& name : sorted_names) { + auto const& entry = g_table[name]; + std::vector value_string; + std::unordered_map count; + for (auto const& vals : entry.m_vals) { + value_string.emplace_back(pp_to_pretty_string(name, vals)); + ++count[value_string.back()]; + } + for (auto const& s : value_string) { + if (--count[s] == 0) { + os << s << '\n'; + } } } } @@ -1375,8 +1243,8 @@ ParmParse::countval (const char* name, // // First find n'th occurrence of name in table. // - const PP_entry* def = ppindex(*m_table, n, prefixedName(name), false); - return def == nullptr ? 0 : static_cast(def->m_vals.size()); + auto const* def = ppindex(*m_table, n, prefixedName(name)); + return def == nullptr ? 0 : static_cast(def->size()); } // BOOL @@ -1386,7 +1254,7 @@ ParmParse::getkth (const char* name, bool& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void @@ -1394,7 +1262,7 @@ ParmParse::get (const char* name, bool& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int @@ -1403,7 +1271,7 @@ ParmParse::querykth (const char* name, bool& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int @@ -1411,7 +1279,7 @@ ParmParse::query (const char* name, bool& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1425,25 +1293,25 @@ ParmParse::add (const char* name, void ParmParse::getkth (const char* name, int k, int& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void ParmParse::get (const char* name, int& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int ParmParse::querykth (const char* name, int k, int& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int ParmParse::query (const char* name, int& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1456,28 +1324,28 @@ void ParmParse::getktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } void ParmParse::getarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } int ParmParse::queryktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } int ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } void @@ -1491,25 +1359,25 @@ ParmParse::addarr (const char* name, const std::vector& ref) void ParmParse::getkth (const char* name, int k, long& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void ParmParse::get (const char* name, long& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int ParmParse::querykth (const char* name, int k, long& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int ParmParse::query (const char* name, long& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1523,28 +1391,28 @@ void ParmParse::getktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } void ParmParse::getarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } int ParmParse::queryktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } int ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } void @@ -1557,25 +1425,25 @@ ParmParse::addarr (const char* name, const std::vector& ref) void ParmParse::getkth (const char* name, int k, long long& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void ParmParse::get (const char* name, long long& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int ParmParse::querykth (const char* name, int k, long long& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int ParmParse::query (const char* name, long long& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1588,28 +1456,28 @@ void ParmParse::getktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } void ParmParse::getarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } int ParmParse::queryktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } int ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } void @@ -1622,25 +1490,25 @@ ParmParse::addarr (const char* name, const std::vector& ref) void ParmParse::getkth (const char* name, int k, float& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void ParmParse::get (const char* name, float& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int ParmParse::querykth (const char* name, int k, float& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int ParmParse::query (const char* name, float& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1653,28 +1521,28 @@ void ParmParse::getktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } void ParmParse::getarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } int ParmParse::queryktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix, num_val,k); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix, num_val,k); } int ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } void @@ -1689,25 +1557,25 @@ ParmParse::addarr (const char* name, const std::vector& ref) void ParmParse::getkth (const char* name, int k, double& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void ParmParse::get (const char* name, double& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int ParmParse::querykth (const char* name, int k, double& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int ParmParse::query (const char* name, double& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1720,28 +1588,28 @@ void ParmParse::getktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } void ParmParse::getarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } int ParmParse::queryktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix, num_val,k); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix, num_val,k); } int ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } void @@ -1756,25 +1624,25 @@ ParmParse::addarr (const char* name, const std::vector& ref) void ParmParse::getkth (const char* name, int k, std::string& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void ParmParse::get (const char* name, std::string& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int ParmParse::querykth (const char* name, int k, std::string& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int ParmParse::query (const char* name, std::string& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1787,28 +1655,28 @@ void ParmParse::getktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } void ParmParse::getarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } int ParmParse::queryktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix, num_val,k); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix, num_val,k); } int ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } void @@ -1823,25 +1691,25 @@ ParmParse::addarr (const char* name, const std::vector& ref) void ParmParse::getkth (const char* name, int k, IntVect& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void ParmParse::get (const char* name, IntVect& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int ParmParse::querykth (const char* name, int k, IntVect& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int ParmParse::query (const char* name, IntVect& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1854,28 +1722,28 @@ void ParmParse::getktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } void ParmParse::getarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } int ParmParse::queryktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix, num_val,k); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix, num_val,k); } int ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } void @@ -1888,25 +1756,25 @@ ParmParse::addarr (const char* name, const std::vector& ref) void ParmParse::getkth (const char* name, int k, Box& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival,k); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } void ParmParse::get (const char* name, Box& ref, int ival) const { - sgetval(*m_table, prefixedName(name),ref,ival, LAST); + sgetval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } int ParmParse::querykth (const char* name, int k, Box& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival,k); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival,k); } int ParmParse::query (const char* name, Box& ref, int ival) const { - return squeryval(*m_table, prefixedName(name),ref,ival, LAST); + return squeryval(*m_table,m_parser_prefix, prefixedName(name),ref,ival, LAST); } void @@ -1919,28 +1787,28 @@ void ParmParse::getktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val,k); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val,k); } void ParmParse::getarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - sgetarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + sgetarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } int ParmParse::queryktharr (const char* name, int k, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix, num_val,k); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix, num_val,k); } int ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, int num_val) const { - return squeryarr(*m_table, prefixedName(name),ref,start_ix,num_val, LAST); + return squeryarr(*m_table,m_parser_prefix, prefixedName(name),ref,start_ix,num_val, LAST); } void @@ -1950,36 +1818,62 @@ ParmParse::addarr (const char* name, const std::vector& ref) } -// -// Return number of occurrences of parameter name. -// +int +ParmParse::queryarr (const char* name, IntVect& ref) const +{ + std::vector v; + int exist = this->queryarr(name, v); + if (exist) { + AMREX_ALWAYS_ASSERT(v.size() == AMREX_SPACEDIM); + for (int i = 0; i < AMREX_SPACEDIM; ++i) { ref[i] = v[i]; } + } + return exist; +} + +void +ParmParse::getarr (const char* name, IntVect& ref) const +{ + std::vector v; + this->getarr(name, v); + AMREX_ALWAYS_ASSERT(v.size() == AMREX_SPACEDIM); + for (int i = 0; i < AMREX_SPACEDIM; ++i) { ref[i] = v[i]; } +} int -ParmParse::countname (const std::string& name) const +ParmParse::queryarr (const char* name, RealVect& ref) const { - int cnt = 0; - for (auto const& li : *m_table) - { - if ( ppfound(prefixedName(name), li, false) ) - { - cnt++; - } + std::vector v; + int exist = this->queryarr(name, v); + if (exist) { + AMREX_ALWAYS_ASSERT(v.size() == AMREX_SPACEDIM); + for (int i = 0; i < AMREX_SPACEDIM; ++i) { ref[i] = v[i]; } } - return cnt; + return exist; +} + +void +ParmParse::getarr (const char* name, RealVect& ref) const +{ + std::vector v; + this->getarr(name, v); + AMREX_ALWAYS_ASSERT(v.size() == AMREX_SPACEDIM); + for (int i = 0; i < AMREX_SPACEDIM; ++i) { ref[i] = v[i]; } } +// +// Return number of occurrences of parameter name. +// + int -ParmParse::countRecords (const std::string& name) const +ParmParse::countname (const std::string& name) const { - int cnt = 0; - for (auto const& li : *m_table) - { - if ( ppfound(prefixedName(name), li, true) ) - { - cnt++; - } + auto pname = prefixedName(name); + auto found = m_table->find(pname); + if (found != m_table->cend()) { + return static_cast(found->second.m_vals.size()); + } else { + return 0; } - return cnt; } // @@ -1989,74 +1883,139 @@ ParmParse::countRecords (const std::string& name) const bool ParmParse::contains (const char* name) const { - for (auto const& li : *m_table) - { - if ( ppfound(prefixedName(name), li, false)) - { - // - // Found an entry; mark all occurrences of name as used. - // - for (auto& lli : *m_table) - { - if ( ppfound(prefixedName(name), lli, false) ) - { - lli.m_queried = true; - } - } - return true; - } + auto pname = prefixedName(name); + auto found = m_table->find(pname); + if (found != m_table->cend()) { +#ifdef AMREX_USE_OMP +#pragma omp atomic update +#endif + ++(found->second.m_count); + return true; + } else { + return false; } - return false; } int ParmParse::remove (const char* name) { - int r = 0; - for (auto it = m_table->begin(); it != m_table->end(); ) { - if (ppfound(prefixedName(name), *it, false)) { - it = m_table->erase(it); - ++r; - } else { - ++it; - } + auto const pname = prefixedName(name); + auto n = m_table->erase(pname); + return static_cast(n); +} + +namespace { +template +bool squeryWithParser (const ParmParse::Table& table, + const std::string& parser_prefix, + const std::string& name, + T& ref) +{ + std::vector vals; + bool exist = squeryarr(table, parser_prefix, name, vals, + ParmParse::FIRST, ParmParse::ALL, ParmParse::LAST); + if (!exist) { return false; } + + std::string combined_string; + for (auto const& v : vals) { + combined_string.append(v); } - return r; + return pp_parser(table, parser_prefix, name, combined_string, ref, true); } -ParmParse::Record -ParmParse::getRecord (const std::string& name, int n) const -{ - const PP_entry* pe = ppindex(*m_table, n, prefixedName(name), true); - if ( pe == nullptr ) - { - amrex::ErrorStream() << "ParmParse::getRecord: record " << name << " not found" << '\n'; - amrex::Abort(); - return Record(ParmParse()); - } else { - return Record(ParmParse(*pe->m_table)); +template +bool squeryarrWithParser (const ParmParse::Table& table, + const std::string& parser_prefix, + const std::string& name, + int nvals, + T* ref) +{ + std::vector vals; + bool exist = squeryarr(table, parser_prefix, name, vals, + ParmParse::FIRST, ParmParse::ALL, ParmParse::LAST); + if (!exist) { return false; } + + AMREX_ALWAYS_ASSERT(int(vals.size()) == nvals); + for (int ival = 0; ival < nvals; ++ival) { + bool r = pp_parser(table, parser_prefix, name, vals[ival], ref[ival], true); + if (!r) { return false; } } + return true; +} } -// -// -// +int +ParmParse::queryWithParser (const char* name, int& ref) const +{ + return squeryWithParser(*m_table,m_parser_prefix,prefixedName(name),ref); +} + +int +ParmParse::queryWithParser (const char* name, long& ref) const +{ + return squeryWithParser(*m_table,m_parser_prefix,prefixedName(name),ref); +} + +int +ParmParse::queryWithParser (const char* name, long long& ref) const +{ + return squeryWithParser(*m_table,m_parser_prefix,prefixedName(name),ref); +} -ParmParse::Record::Record ( const ParmParse& pp ) - : m_pp(pp) +int +ParmParse::queryWithParser (const char* name, float& ref) const +{ + return squeryWithParser(*m_table,m_parser_prefix,prefixedName(name),ref); +} + +int +ParmParse::queryWithParser (const char* name, double& ref) const +{ + return squeryWithParser(*m_table,m_parser_prefix,prefixedName(name),ref); +} + +int +ParmParse::queryarrWithParser (const char* name, int nvals, int* ref) const +{ + return squeryarrWithParser(*m_table,m_parser_prefix,prefixedName(name),nvals,ref); +} + +int +ParmParse::queryarrWithParser (const char* name, int nvals, long* ref) const +{ + return squeryarrWithParser(*m_table,m_parser_prefix,prefixedName(name),nvals,ref); +} + +int +ParmParse::queryarrWithParser (const char* name, int nvals, long long* ref) const +{ + return squeryarrWithParser(*m_table,m_parser_prefix,prefixedName(name),nvals,ref); +} + +int +ParmParse::queryarrWithParser (const char* name, int nvals, float* ref) const +{ + return squeryarrWithParser(*m_table,m_parser_prefix,prefixedName(name),nvals,ref); +} + +int +ParmParse::queryarrWithParser (const char* name, int nvals, double* ref) const { + return squeryarrWithParser(*m_table,m_parser_prefix,prefixedName(name),nvals,ref); } -const ParmParse* -ParmParse::Record::operator-> () const +Parser +ParmParse::makeParser (std::string const& func, + Vector const& vars) const { - return &m_pp; + return pp_make_parser(func, vars, *m_table, m_parser_prefix, true); } -const ParmParse& -ParmParse::Record::operator* () const +IParser +ParmParse::makeIParser (std::string const& func, + Vector const& vars) const { - return m_pp; + return pp_make_parser(func, vars, *m_table, m_parser_prefix, true); } } diff --git a/Src/Base/AMReX_Periodicity.H b/Src/Base/AMReX_Periodicity.H index 959e1bb34af..d16e175a4fb 100644 --- a/Src/Base/AMReX_Periodicity.H +++ b/Src/Base/AMReX_Periodicity.H @@ -32,7 +32,7 @@ public: //! Cell-centered domain Box "infinitely" long in non-periodic directions. [[nodiscard]] Box Domain () const noexcept; - [[nodiscard]] std::vector shiftIntVect () const; + [[nodiscard]] std::vector shiftIntVect (IntVect const& nghost = IntVect(0)) const; static const Periodicity& NonPeriodic () noexcept; diff --git a/Src/Base/AMReX_Periodicity.cpp b/Src/Base/AMReX_Periodicity.cpp index 20dfba1c326..797dfb79ab3 100644 --- a/Src/Base/AMReX_Periodicity.cpp +++ b/Src/Base/AMReX_Periodicity.cpp @@ -5,7 +5,7 @@ namespace amrex { std::vector -Periodicity::shiftIntVect () const +Periodicity::shiftIntVect (IntVect const& nghost) const { std::vector r; @@ -15,6 +15,9 @@ Periodicity::shiftIntVect () const for (int i = 0; i < AMREX_SPACEDIM; ++i) { if (isPeriodic(i)) { per[i] = jmp[i] = period[i]; + while (per[i] < nghost[i]) { + per[i] += period[i]; + } } } diff --git a/Src/Base/AMReX_PlotFileDataImpl.cpp b/Src/Base/AMReX_PlotFileDataImpl.cpp index b85c17ad93c..28cc38e27f4 100644 --- a/Src/Base/AMReX_PlotFileDataImpl.cpp +++ b/Src/Base/AMReX_PlotFileDataImpl.cpp @@ -31,7 +31,7 @@ PlotFileDataImpl::PlotFileDataImpl (std::string const& plotfile_name) for (int i = 0; i < m_ncomp; ++i) { std::string tmp; std::getline(is, tmp); - m_var_names[i] = amrex::trim(tmp); + m_var_names[i] = amrex::trim(std::move(tmp)); } is >> m_spacedim >> m_time >> m_finest_level; diff --git a/Src/Base/AMReX_RKIntegrator.H b/Src/Base/AMReX_RKIntegrator.H index f72890c808e..a6efd028532 100644 --- a/Src/Base/AMReX_RKIntegrator.H +++ b/Src/Base/AMReX_RKIntegrator.H @@ -217,9 +217,6 @@ public: BaseT::post_stage_action(S_new, stage_time); } - // Call the update hook for the stage state value - BaseT::pre_rhs_action(S_new, stage_time); - // Fill F[i], the RHS at the current stage // F[i] = RHS(y, t) at y = stage_value, t = stage_time BaseT::Rhs(*F_nodes[i], S_new, stage_time); diff --git a/Src/Base/AMReX_Random.cpp b/Src/Base/AMReX_Random.cpp index a0b72155c50..891a69e140a 100644 --- a/Src/Base/AMReX_Random.cpp +++ b/Src/Base/AMReX_Random.cpp @@ -187,15 +187,14 @@ UniqueRandomSubset (Vector &uSet, int setSize, int poolSize, Abort("**** Error in UniqueRandomSubset: setSize > poolSize."); } std::set copySet; - Vector uSetTemp; + uSet.clear(); while(static_cast(copySet.size()) < setSize) { int r = static_cast(Random_int(poolSize)); if(copySet.find(r) == copySet.end()) { copySet.insert(r); - uSetTemp.push_back(r); + uSet.push_back(r); } } - uSet = uSetTemp; if(printSet) { for(int i(0); i < uSet.size(); ++i) { AllPrint() << "uSet[" << i << "] = " << uSet[i] << '\n'; diff --git a/Src/Base/AMReX_String.H b/Src/Base/AMReX_String.H new file mode 100644 index 00000000000..147b7ab1870 --- /dev/null +++ b/Src/Base/AMReX_String.H @@ -0,0 +1,30 @@ +#ifndef AMREX_STRING_H_ +#define AMREX_STRING_H_ +#include + +#include +#include + +namespace amrex { + + //! Converts all characters of the string into lower case based on std::locale + std::string toLower (std::string s); + + //! Converts all characters of the string into uppercase based on std::locale + std::string toUpper (std::string s); + + //! Trim leading and trailing characters in the optional `space` + //! argument. + std::string trim (std::string s, std::string const& space = " \t"); + + //! Returns rootNNNN where NNNN == num. + std::string Concatenate (const std::string& root, + int num, + int mindigits = 5); + + //! Split a string using given tokens in `sep`. + std::vector split (std::string const& s, + std::string const& sep = " \t"); +} + +#endif diff --git a/Src/Base/AMReX_String.cpp b/Src/Base/AMReX_String.cpp new file mode 100644 index 00000000000..24dbce4532f --- /dev/null +++ b/Src/Base/AMReX_String.cpp @@ -0,0 +1,54 @@ +#include +#include + +#include +#include +#include +#include + +namespace amrex { + +std::string toLower (std::string s) +{ + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return std::tolower(c); }); + return s; +} + +std::string toUpper (std::string s) +{ + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return std::toupper(c); }); + return s; +} + +std::string trim(std::string s, std::string const& space) +{ + const auto sbegin = s.find_first_not_of(space); + if (sbegin == std::string::npos) { return std::string{}; } + const auto send = s.find_last_not_of(space); + s = s.substr(sbegin, send-sbegin+1); + return s; +} + +std::string Concatenate (const std::string& root, int num, int mindigits) +{ + BL_ASSERT(mindigits >= 0); + std::stringstream result; + result << root << std::setfill('0') << std::setw(mindigits) << num; + return result.str(); +} + +std::vector split (std::string const& s, std::string const& sep) +{ + std::vector result; + std::size_t pos_begin, pos_end = 0; + while ((pos_begin = s.find_first_not_of(sep,pos_end)) != std::string::npos) { + pos_end = s.find_first_of(sep,pos_begin); + result.push_back(s.substr(pos_begin,pos_end-pos_begin)); + if (pos_end == std::string::npos) { break; } + } + return result; +} + +} diff --git a/Src/Base/AMReX_TimeIntegrator.H b/Src/Base/AMReX_TimeIntegrator.H index 10443361533..316ad0ff318 100644 --- a/Src/Base/AMReX_TimeIntegrator.H +++ b/Src/Base/AMReX_TimeIntegrator.H @@ -65,17 +65,14 @@ private: void set_default_functions () { // By default, do nothing in the RHS - set_rhs([](T& /* S_rhs */, const T& /* S_data */, const amrex::Real /* time */){}); - set_imex_rhs([](T& /* S_rhs */, const T& /* S_data */, const amrex::Real /* time */){}, - [](T& /* S_rhs */, const T& /* S_data */, const amrex::Real /* time */){}); - set_fast_rhs([](T& /* S_rhs */, const T& /* S_data */, const amrex::Real /* time */){}); + set_rhs([](T& /* S_rhs */, T& /* S_data */, const amrex::Real /* time */){}); + set_imex_rhs([](T& /* S_rhs */, T& /* S_data */, const amrex::Real /* time */){}, + [](T& /* S_rhs */, T& /* S_data */, const amrex::Real /* time */){}); + set_fast_rhs([](T& /* S_rhs */, T& /* S_data */, const amrex::Real /* time */){}); // In general, the following functions can be used to fill BCs. Which // function to set will depend on the method type and intended use case - // By default, do nothing before calling the RHS - set_pre_rhs_action([](T& /* S_data */, amrex::Real /* time */){}); - // By default, do nothing after a stage or step set_post_stage_action([](T& /* S_data */, const amrex::Real /* time */){}); set_post_step_action([](T& /* S_data */, const amrex::Real /* time */){}); @@ -134,27 +131,22 @@ public: } } - void set_rhs (std::function F) + void set_rhs (std::function F) { integrator_ptr->set_rhs(F); } - void set_imex_rhs (std::function Fi, - std::function Fe) + void set_imex_rhs (std::function Fi, + std::function Fe) { integrator_ptr->set_imex_rhs(Fi, Fe); } - void set_fast_rhs (std::function F) + void set_fast_rhs (std::function F) { integrator_ptr->set_fast_rhs(F); } - void set_pre_rhs_action (std::function A) - { - integrator_ptr->set_pre_rhs_action(A); - } - void set_post_stage_action (std::function A) { integrator_ptr->set_post_stage_action(A); @@ -175,11 +167,6 @@ public: integrator_ptr->set_post_fast_step_action(A); } - void set_post_update (std::function A) - { - integrator_ptr->set_post_update(A); - } - amrex::Real get_time_step () { return integrator_ptr->get_time_step(); diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index 4ddbb05ea7e..0228949beb1 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -5,14 +5,6 @@ #include #include -#ifdef AMREX_USE_CUDA -#include -#endif - -#if defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) -#include -#endif - #include #include #include @@ -143,15 +135,19 @@ private: static std::deque > ttstack; static std::map > statsmap; static double t_init; - static int device_synchronize_around_region; + static bool device_synchronize_around_region; static int n_print_tabs; static int verbose; static double print_threshold; + static bool enabled; + static bool memprof_enabled; + static std::string output_file; - static void PrintStats (std::map& regstats, double dt_max); + static void PrintStats (std::map& regstats, double dt_max, + std::ostream* os); static void PrintMemStats (std::map& memstats, std::string const& memname, double dt_max, - double t_final); + double t_final, std::ostream* os); }; class TinyProfileRegion diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index 1cffe78e75a..db922745784 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -15,6 +15,18 @@ #include #endif +#ifdef AMREX_USE_CUDA +#if __has_include() +# include +#else +# include +#endif +#endif + +#if defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) +#include +#endif + #include #include #include @@ -34,10 +46,13 @@ std::vector TinyProfiler::regionstack; std::deque > TinyProfiler::ttstack; std::map > TinyProfiler::statsmap; double TinyProfiler::t_init = std::numeric_limits::max(); -int TinyProfiler::device_synchronize_around_region = 0; +bool TinyProfiler::device_synchronize_around_region = false; int TinyProfiler::n_print_tabs = 0; int TinyProfiler::verbose = 0; double TinyProfiler::print_threshold = 1.; +bool TinyProfiler::enabled = true; +bool TinyProfiler::memprof_enabled = true; +std::string TinyProfiler::output_file; namespace { constexpr char mainregion[] = "main"; @@ -75,6 +90,8 @@ TinyProfiler::~TinyProfiler () void TinyProfiler::start () noexcept { + if (!enabled) { return; } + memory_start(); #ifdef AMREX_USE_OMP @@ -124,6 +141,8 @@ TinyProfiler::start () noexcept for (int itab = 0; itab < n_print_tabs; ++itab) { whitespace += " "; } + // If we try to print to output_file here, it may not be thread + // safe. Also note that this is controlled by verbose already. amrex::Print() << whitespace << "TP: Entering " << fname << '\n'; } } @@ -132,6 +151,8 @@ TinyProfiler::start () noexcept void TinyProfiler::stop () noexcept { + if (!enabled) { return; } + memory_stop(); #ifdef AMREX_USE_OMP @@ -193,13 +214,18 @@ TinyProfiler::stop () noexcept whitespace += " "; } --n_print_tabs; + // If we try to print to output_file here, it may not be thread + // safe. Also note that this is controlled by verbose already. amrex::Print() << whitespace << "TP: Leaving " << fname << '\n'; } } } void -TinyProfiler::memory_start () const noexcept { +TinyProfiler::memory_start () const noexcept +{ + if (!memprof_enabled) { return; } + // multiple omp threads may share the same TinyProfiler object so this function must be const // it is NOT allowed to double start a section #ifdef AMREX_USE_OMP @@ -213,7 +239,10 @@ TinyProfiler::memory_start () const noexcept { } void -TinyProfiler::memory_stop () const noexcept { +TinyProfiler::memory_stop () const noexcept +{ + if (!memprof_enabled) { return; } + // multiple omp threads may share the same TinyProfiler object so this function must be const // it IS allowed to double stop a section #ifdef AMREX_USE_OMP @@ -232,7 +261,10 @@ TinyProfiler::memory_stop () const noexcept { } MemStat* -TinyProfiler::memory_alloc (std::size_t nbytes, std::map& memstats) noexcept { +TinyProfiler::memory_alloc (std::size_t nbytes, std::map& memstats) noexcept +{ + if (!memprof_enabled) { return nullptr; } + // this function is not thread safe for the same memstats // the caller of this function (CArena::alloc) has a mutex MemStat* stat = nullptr; @@ -258,7 +290,10 @@ TinyProfiler::memory_alloc (std::size_t nbytes, std::map& } void -TinyProfiler::memory_free (std::size_t nbytes, MemStat* stat) noexcept { +TinyProfiler::memory_free (std::size_t nbytes, MemStat* stat) noexcept +{ + if (!memprof_enabled) { return; } + // this function is not thread safe for the same stat // the caller of this function (CArena::free) has a mutex if (stat) { @@ -272,22 +307,48 @@ TinyProfiler::memory_free (std::size_t nbytes, MemStat* stat) noexcept { void TinyProfiler::Initialize () noexcept { - regionstack.emplace_back(mainregion); - t_init = amrex::second(); { amrex::ParmParse pp("tiny_profiler"); pp.queryAdd("device_synchronize_around_region", device_synchronize_around_region); - pp.queryAdd("verbose", verbose); - pp.queryAdd("v", verbose); + if (! pp.query("verbose", "v", verbose)) { + pp.add("verbose", verbose); + } // Specify the maximum percentage of inclusive time // that the "Other" section in the output can have (default 1%) pp.queryAdd("print_threshold", print_threshold); + + pp.queryAdd("enabled", enabled); + pp.queryAdd("output_file", output_file); } + + if (!enabled) { return; } + + if (ParallelDescriptor::IOProcessor()) { + static bool first = true; + if (first && !output_file.empty() && output_file != "/dev/null") { + if (FileSystem::Exists(output_file)) { + FileSystem::Remove(output_file); + } + first = false; + } + } + + regionstack.emplace_back(mainregion); + t_init = amrex::second(); } void TinyProfiler::MemoryInitialize () noexcept { + { + amrex::ParmParse pp("tiny_profiler"); + pp.queryAdd("enabled", enabled); + pp.queryAdd("memprof_enabled", memprof_enabled); + memprof_enabled = memprof_enabled && enabled; + } + + if (!memprof_enabled) { return; } + #ifdef AMREX_USE_OMP mem_stack_thread_private.resize(omp_get_max_threads()); #endif @@ -296,6 +357,8 @@ TinyProfiler::MemoryInitialize () noexcept void TinyProfiler::Finalize (bool bFlushing) noexcept { + if (!enabled) { return; } + static bool finalized = false; if (!bFlushing) { // If flushing, don't make this the last time! if (finalized) { @@ -321,11 +384,25 @@ TinyProfiler::Finalize (bool bFlushing) noexcept ParallelReduce::Sum(dt_avg, ioproc, ParallelDescriptor::Communicator()); dt_avg /= double(nprocs); - if (ParallelDescriptor::IOProcessor()) + std::ofstream ofs; + std::ostream* os = nullptr; + std::streamsize oldprec = 0; + if (ParallelDescriptor::IOProcessor()) { + if (output_file.empty()) { + os = &(amrex::OutStream()); + } else if (output_file != "/dev/null") { + ofs.open(output_file, std::ios_base::app); + if (!ofs.is_open()) { + amrex::Error("TinyProfiler failed to open "+output_file); + } + os = static_cast(&ofs); + } + } + + if (os) { - amrex::Print() << "\n\n"; - amrex::Print().SetPrecision(4) - <<"TinyProfiler total time across processes [min...avg...max]: " + os->precision(4); + *os << "\n\nTinyProfiler total time across processes [min...avg...max]: " << dt_min << " ... " << dt_avg << " ... " << dt_max << "\n"; } @@ -349,19 +426,27 @@ TinyProfiler::Finalize (bool bFlushing) noexcept } } - PrintStats(lstatsmap[mainregion], dt_max); + PrintStats(lstatsmap[mainregion], dt_max, os); for (auto& kv : lstatsmap) { if (kv.first != mainregion) { - amrex::Print() << "\n\nBEGIN REGION " << kv.first << "\n"; - PrintStats(kv.second, dt_max); - amrex::Print() << "END REGION " << kv.first << "\n"; + if (os) { + *os << "\n\nBEGIN REGION " << kv.first << "\n"; + } + PrintStats(kv.second, dt_max, os); + if (os) { + *os << "END REGION " << kv.first << "\n"; + } } } + + if(os) { os->precision(oldprec); } } void TinyProfiler::MemoryFinalize (bool bFlushing) noexcept { + if (!memprof_enabled) { return; } + // This function must be called BEFORE the profiled arenas are deleted static bool finalized = false; @@ -378,20 +463,39 @@ TinyProfiler::MemoryFinalize (bool bFlushing) noexcept int ioproc = ParallelDescriptor::IOProcessorNumber(); ParallelReduce::Max(dt_max, ioproc, ParallelDescriptor::Communicator()); + std::ofstream ofs; + std::ostream* os = nullptr; + std::streamsize oldprec = 0; + if (ParallelDescriptor::IOProcessor()) { + if (output_file.empty()) { + os = &(amrex::OutStream()); + } else if (output_file != "/dev/null") { + ofs.open(output_file, std::ios_base::app); + if (!ofs.is_open()) { + amrex::Error("TinyProfiler failed to open "+output_file); + } + os = static_cast(&ofs); + } + } + for (std::size_t i = 0; i < all_memstats.size(); ++i) { - PrintMemStats(*(all_memstats[i]), all_memnames[i], dt_max, t_final); + PrintMemStats(*(all_memstats[i]), all_memnames[i], dt_max, t_final, os); } if (!bFlushing) { all_memstats.clear(); all_memnames.clear(); } + + if(os) { os->precision(oldprec); } } void TinyProfiler::RegisterArena (const std::string& memory_name, std::map& memstats) noexcept { + if (!memprof_enabled) { return; } + all_memstats.push_back(&memstats); all_memnames.push_back(memory_name); } @@ -399,6 +503,8 @@ TinyProfiler::RegisterArena (const std::string& memory_name, void TinyProfiler::DeregisterArena (std::map& memstats) noexcept { + if (!memprof_enabled) { return; } + for (std::size_t i = 0; i < all_memstats.size();) { if (all_memstats[i] == &memstats) { all_memstats.erase(all_memstats.begin() + i); // NOLINT @@ -410,7 +516,8 @@ TinyProfiler::DeregisterArena (std::map& memstats) noexcep } void -TinyProfiler::PrintStats (std::map& regstats, double dt_max) +TinyProfiler::PrintStats (std::map& regstats, double dt_max, + std::ostream* os) { // make sure the set of profiled functions is the same on all processes { @@ -484,9 +591,9 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) } } - if (ParallelDescriptor::IOProcessor()) + if (ParallelDescriptor::IOProcessor() && os) { - amrex::OutStream() << std::setfill(' ') << std::setprecision(4); + *os << std::setfill(' ') << std::setprecision(4); int wt = 9; int wnc = (int) std::log10 ((double) maxncalls) + 1; @@ -551,34 +658,34 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) // make sure "Other" is printed at the end of the list allprocstats.push_back(other_procstat); } - amrex::OutStream() << "\n" << hline << "\n"; - amrex::OutStream() << std::left - << std::setw(maxfnamelen) << "Name" - << std::right - << std::setw(wnc+2) << "NCalls" - << std::setw(wt+2) << "Excl. Min" - << std::setw(wt+2) << "Excl. Avg" - << std::setw(wt+2) << "Excl. Max" - << std::setw(wp+2) << "Max %" - << "\n" << hline << "\n"; + *os << "\n" << hline << "\n"; + *os << std::left + << std::setw(maxfnamelen) << "Name" + << std::right + << std::setw(wnc+2) << "NCalls" + << std::setw(wt+2) << "Excl. Min" + << std::setw(wt+2) << "Excl. Avg" + << std::setw(wt+2) << "Excl. Max" + << std::setw(wp+2) << "Max %" + << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { if (!allprocstat.do_print) { continue; } - amrex::OutStream() << std::setprecision(4) << std::left - << std::setw(maxfnamelen) << allprocstat.fname - << std::right - << std::setw(wnc+2) << allprocstat.navg - << std::setw(wt+2) << allprocstat.dtexmin - << std::setw(wt+2) << allprocstat.dtexavg - << std::setw(wt+2) << allprocstat.dtexmax - << std::setprecision(2) << std::setw(wp+1) << std::fixed - << allprocstat.dtexmax*(100.0/dt_max) << "%"; - amrex::OutStream().unsetf(std::ios_base::fixed); - amrex::OutStream() << "\n"; - } - amrex::OutStream() << hline << "\n"; + *os << std::setprecision(4) << std::left + << std::setw(maxfnamelen) << allprocstat.fname + << std::right + << std::setw(wnc+2) << allprocstat.navg + << std::setw(wt+2) << allprocstat.dtexmin + << std::setw(wt+2) << allprocstat.dtexavg + << std::setw(wt+2) << allprocstat.dtexmax + << std::setprecision(2) << std::setw(wp+1) << std::fixed + << allprocstat.dtexmax*(100.0/dt_max) << "%"; + os->unsetf(std::ios_base::fixed); + *os << "\n"; + } + *os << hline << "\n"; if (print_other_procstat) { allprocstats.pop_back(); } @@ -589,41 +696,41 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) // make sure "Other" is printed at the end of the list allprocstats.push_back(other_procstat); } - amrex::OutStream() << "\n" << hline << "\n"; - amrex::OutStream() << std::left - << std::setw(maxfnamelen) << "Name" - << std::right - << std::setw(wnc+2) << "NCalls" - << std::setw(wt+2) << "Incl. Min" - << std::setw(wt+2) << "Incl. Avg" - << std::setw(wt+2) << "Incl. Max" - << std::setw(wp+2) << "Max %" - << "\n" << hline << "\n"; + *os << "\n" << hline << "\n"; + *os << std::left + << std::setw(maxfnamelen) << "Name" + << std::right + << std::setw(wnc+2) << "NCalls" + << std::setw(wt+2) << "Incl. Min" + << std::setw(wt+2) << "Incl. Avg" + << std::setw(wt+2) << "Incl. Max" + << std::setw(wp+2) << "Max %" + << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { if (!allprocstat.do_print) { continue; } - amrex::OutStream() << std::setprecision(4) << std::left - << std::setw(maxfnamelen) << allprocstat.fname - << std::right - << std::setw(wnc+2) << allprocstat.navg - << std::setw(wt+2) << allprocstat.dtinmin - << std::setw(wt+2) << allprocstat.dtinavg - << std::setw(wt+2) << allprocstat.dtinmax - << std::setprecision(2) << std::setw(wp+1) << std::fixed - << allprocstat.dtinmax*(100.0/dt_max) << "%"; - amrex::OutStream().unsetf(std::ios_base::fixed); - amrex::OutStream() << "\n"; - } - amrex::OutStream() << hline << "\n\n"; + *os << std::setprecision(4) << std::left + << std::setw(maxfnamelen) << allprocstat.fname + << std::right + << std::setw(wnc+2) << allprocstat.navg + << std::setw(wt+2) << allprocstat.dtinmin + << std::setw(wt+2) << allprocstat.dtinavg + << std::setw(wt+2) << allprocstat.dtinmax + << std::setprecision(2) << std::setw(wp+1) << std::fixed + << allprocstat.dtinmax*(100.0/dt_max) << "%"; + os->unsetf(std::ios_base::fixed); + *os << "\n"; + } + *os << hline << "\n\n"; } } void -TinyProfiler::PrintMemStats(std::map& memstats, - std::string const& memname, double dt_max, - double t_final) +TinyProfiler::PrintMemStats (std::map& memstats, + std::string const& memname, double dt_max, + double t_final, std::ostream* os) { // make sure the set of profiled functions is the same on all processes { @@ -767,7 +874,7 @@ TinyProfiler::PrintMemStats(std::map& memstats, maxlen[i] += 2; } - if (allstatsstr.size() == 1) { return; } + if (allstatsstr.size() == 1 || !os) { return; } int lenhline = 0; for (auto i : maxlen) { @@ -775,24 +882,26 @@ TinyProfiler::PrintMemStats(std::map& memstats, } const std::string hline(lenhline, '-'); - amrex::OutStream() << memname << " Usage:\n"; - amrex::OutStream() << hline << "\n"; + *os << memname << " Usage:\n"; + *os << hline << "\n"; for (std::size_t i=0; i(x)) << "\n"; diff --git a/Src/Base/AMReX_Utility.H b/Src/Base/AMReX_Utility.H index 016b8adb0e2..6bec276dbf2 100644 --- a/Src/Base/AMReX_Utility.H +++ b/Src/Base/AMReX_Utility.H @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -44,17 +45,6 @@ namespace amrex const std::vector& Tokenize (const std::string& instr, const std::string& separators); - //! Converts all characters of the string into lower or uppercase based on std::locale - std::string toLower (std::string s); - std::string toUpper (std::string s); - - //! Trim leading and trailing white space - std::string trim (std::string s, std::string const& space = " \t"); - - //! Returns rootNNNN where NNNN == num. - std::string Concatenate (const std::string& root, - int num, - int mindigits = 5); /** * \brief Creates the specified directories. path may be either a full pathname * or a relative pathname. It will create all the directories in the diff --git a/Src/Base/AMReX_Utility.cpp b/Src/Base/AMReX_Utility.cpp index 1c79dfba92f..aa3d8a2d165 100644 --- a/Src/Base/AMReX_Utility.cpp +++ b/Src/Base/AMReX_Utility.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -113,44 +112,6 @@ amrex::Tokenize (const std::string& instr, return tokens; } -std::string -amrex::toLower (std::string s) -{ - std::transform(s.begin(), s.end(), s.begin(), - [](unsigned char c) { return std::tolower(c); }); - return s; -} - -std::string -amrex::toUpper (std::string s) -{ - std::transform(s.begin(), s.end(), s.begin(), - [](unsigned char c) { return std::toupper(c); }); - return s; -} - -std::string -amrex::trim(std::string s, std::string const& space) -{ - const auto sbegin = s.find_first_not_of(space); - if (sbegin == std::string::npos) { return std::string{}; } - const auto send = s.find_last_not_of(space); - s = s.substr(sbegin, send-sbegin+1); - return s; -} - -std::string -amrex::Concatenate (const std::string& root, - int num, - int mindigits) -{ - BL_ASSERT(mindigits >= 0); - std::stringstream result; - result << root << std::setfill('0') << std::setw(mindigits) << num; - return result.str(); -} - - bool amrex::UtilCreateDirectory (const std::string& path, mode_t mode, bool verbose) diff --git a/Src/Base/AMReX_VisMF.cpp b/Src/Base/AMReX_VisMF.cpp index 4c8b4b43fb2..2b0ac6716d0 100644 --- a/Src/Base/AMReX_VisMF.cpp +++ b/Src/Base/AMReX_VisMF.cpp @@ -77,24 +77,24 @@ VisMF::Initialize () amrex::ExecOnFinalize(VisMF::Finalize); ParmParse pp("vismf"); - pp.queryAdd("v",verbose); + pp.query("verbose", "v",verbose); int headerVersion(currentVersion); - pp.queryAdd("headerversion", headerVersion); + pp.query("headerversion", headerVersion); if(headerVersion != currentVersion) { currentVersion = static_cast (headerVersion); } - pp.queryAdd("groupsets", groupSets); - pp.queryAdd("setbuf", setBuf); - pp.queryAdd("usesingleread", useSingleRead); - pp.queryAdd("usesinglewrite", useSingleWrite); - pp.queryAdd("checkfilepositions", checkFilePositions); - pp.queryAdd("usepersistentifstreams", usePersistentIFStreams); - pp.queryAdd("usesynchronousreads", useSynchronousReads); - pp.queryAdd("usedynamicsetselection", useDynamicSetSelection); - pp.queryAdd("iobuffersize", ioBufferSize); - pp.queryAdd("allowsparsewrites", allowSparseWrites); + pp.query("groupsets", groupSets); + pp.query("setbuf", setBuf); + pp.query("usesingleread", useSingleRead); + pp.query("usesinglewrite", useSingleWrite); + pp.query("checkfilepositions", checkFilePositions); + pp.query("usepersistentifstreams", usePersistentIFStreams); + pp.query("usesynchronousreads", useSynchronousReads); + pp.query("usedynamicsetselection", useDynamicSetSelection); + pp.query("iobuffersize", ioBufferSize); + pp.query("allowsparsewrites", allowSparseWrites); initialized = true; } diff --git a/Src/Base/AMReX_iMultiFab.cpp b/Src/Base/AMReX_iMultiFab.cpp index d79e1d71444..090f1ca96d6 100644 --- a/Src/Base/AMReX_iMultiFab.cpp +++ b/Src/Base/AMReX_iMultiFab.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index cebd1f9bce1..0436ad032e4 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -12,6 +12,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_Array.H AMReX_BlockMutex.H AMReX_BlockMutex.cpp + AMReX_Enum.H AMReX_GpuComplex.H AMReX_Vector.H AMReX_TableData.H @@ -30,6 +31,8 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_parmparse_fi.cpp AMReX_ParmParse.H AMReX_Functional.H + AMReX_String.H + AMReX_String.cpp AMReX_Utility.H AMReX_Utility.cpp AMReX_FileSystem.H diff --git a/Src/Base/Make.package b/Src/Base/Make.package index dfbfb4f03a1..b009ebf7d65 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -2,6 +2,7 @@ AMREX_BASE=EXE C$(AMREX_BASE)_headers += AMReX_ccse-mpi.H AMReX_Algorithm.H AMReX_Any.H AMReX_Array.H +C$(AMREX_BASE)_headers += AMReX_Enum.H C$(AMREX_BASE)_headers += AMReX_Vector.H AMReX_TableData.H AMReX_Tuple.H AMReX_Math.H C$(AMREX_BASE)_headers += AMReX_TypeList.H @@ -22,6 +23,9 @@ C$(AMREX_BASE)_sources += AMReX_PODVector.cpp C$(AMREX_BASE)_headers += AMReX_BlockMutex.H C$(AMREX_BASE)_sources += AMReX_BlockMutex.cpp +C$(AMREX_BASE)_headers += AMReX_String.H +C$(AMREX_BASE)_sources += AMReX_String.cpp + C$(AMREX_BASE)_sources += AMReX_ParmParse.cpp AMReX_parmparse_fi.cpp AMReX_Utility.cpp C$(AMREX_BASE)_headers += AMReX_ParmParse.H AMReX_Utility.H AMReX_BLassert.H AMReX_ArrayLim.H C$(AMREX_BASE)_headers += AMReX_Functional.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H diff --git a/Src/Base/Parser/AMReX_IParser.H b/Src/Base/Parser/AMReX_IParser.H index 6cf1e5b2b0f..9b3f8af6f34 100644 --- a/Src/Base/Parser/AMReX_IParser.H +++ b/Src/Base/Parser/AMReX_IParser.H @@ -18,7 +18,7 @@ struct IParserExecutor { template = 0> [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - int operator() () const noexcept + long long operator() () const noexcept { AMREX_IF_ON_DEVICE((return iparser_exe_eval(m_device_executor, nullptr);)) AMREX_IF_ON_HOST((return iparser_exe_eval(m_host_executor, nullptr);)) @@ -26,16 +26,17 @@ struct IParserExecutor template [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - std::enable_if_t + std::enable_if_t...>, + long long> operator() (Ts... var) const noexcept { - amrex::GpuArray l_var{var...}; + amrex::GpuArray l_var{var...}; AMREX_IF_ON_DEVICE((return iparser_exe_eval(m_device_executor, l_var.data());)) AMREX_IF_ON_HOST((return iparser_exe_eval(m_host_executor, l_var.data());)) } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - int operator() (GpuArray const& var) const noexcept + long long operator() (GpuArray const& var) const noexcept { AMREX_IF_ON_DEVICE((return iparser_exe_eval(m_device_executor, var.data());)) AMREX_IF_ON_HOST((return iparser_exe_eval(m_host_executor, var.data());)) @@ -62,7 +63,7 @@ public: explicit operator bool () const; - void setConstant (std::string const& name, int c); + void setConstant (std::string const& name, long long c); void registerVariables (Vector const& vars); diff --git a/Src/Base/Parser/AMReX_IParser.cpp b/Src/Base/Parser/AMReX_IParser.cpp index 0845d47b842..ecda330f198 100644 --- a/Src/Base/Parser/AMReX_IParser.cpp +++ b/Src/Base/Parser/AMReX_IParser.cpp @@ -54,7 +54,7 @@ IParser::operator bool () const } void -IParser::setConstant (std::string const& name, int c) +IParser::setConstant (std::string const& name, long long c) { if (m_data && m_data->m_iparser) { iparser_setconst(m_data->m_iparser, name.c_str(), c); diff --git a/Src/Base/Parser/AMReX_IParser_Exe.H b/Src/Base/Parser/AMReX_IParser_Exe.H index 67756a93599..e7e41c44d48 100644 --- a/Src/Base/Parser/AMReX_IParser_Exe.H +++ b/Src/Base/Parser/AMReX_IParser_Exe.H @@ -62,7 +62,7 @@ struct alignas(8) IParserExeNull { struct alignas(8) IParserExeNumber { enum iparser_exe_t type = IPARSER_EXE_NUMBER; - int v; + long long v; }; struct alignas(8) IParserExeSymbol { @@ -76,7 +76,7 @@ struct alignas(8) IParserExeADD { struct alignas(8) IParserExeSUB { enum iparser_exe_t type = IPARSER_EXE_SUB; - int sign; + long long sign; }; struct alignas(8) IParserExeMUL { @@ -113,31 +113,31 @@ struct alignas(8) IParserExeF2_B { struct alignas(8) IParserExeADD_VP { enum iparser_exe_t type = IPARSER_EXE_ADD_VP; int i; - int v; + long long v; }; struct alignas(8) IParserExeSUB_VP { enum iparser_exe_t type = IPARSER_EXE_SUB_VP; int i; - int v; + long long v; }; struct alignas(8) IParserExeMUL_VP { enum iparser_exe_t type = IPARSER_EXE_MUL_VP; int i; - int v; + long long v; }; struct alignas(8) IParserExeDIV_VP { enum iparser_exe_t type = IPARSER_EXE_DIV_VP; int i; - int v; + long long v; }; struct alignas(8) IParserExeDIV_PV { enum iparser_exe_t type = IPARSER_EXE_DIV_PV; int i; - int v; + long long v; }; struct alignas(8) IParserExeADD_PP { @@ -171,27 +171,27 @@ struct alignas(8) IParserExeNEG_P { struct alignas(8) IParserExeADD_VN { enum iparser_exe_t type = IPARSER_EXE_ADD_VN; - int v; + long long v; }; struct alignas(8) IParserExeSUB_VN { enum iparser_exe_t type = IPARSER_EXE_SUB_VN; - int v; + long long v; }; struct alignas(8) IParserExeMUL_VN { enum iparser_exe_t type = IPARSER_EXE_MUL_VN; - int v; + long long v; }; struct alignas(8) IParserExeDIV_VN { enum iparser_exe_t type = IPARSER_EXE_DIV_VN; - int v; + long long v; }; struct alignas(8) IParserExeDIV_NV { enum iparser_exe_t type = IPARSER_EXE_DIV_NV; - int v; + long long v; }; struct alignas(8) IParserExeADD_PN { @@ -202,7 +202,7 @@ struct alignas(8) IParserExeADD_PN { struct alignas(8) IParserExeSUB_PN { enum iparser_exe_t type = IPARSER_EXE_SUB_PN; int i; - int sign; + long long sign; }; struct alignas(8) IParserExeMUL_PN { @@ -229,19 +229,19 @@ struct alignas(8) IParserExeJUMP { template struct IParserStack { - int m_data[N]; + long long m_data[N]; int m_size = 0; - constexpr void push (int v) { m_data[m_size++] = v; } + constexpr void push (long long v) { m_data[m_size++] = v; } constexpr void pop () { --m_size; } - [[nodiscard]] constexpr int const& top () const { return m_data[m_size-1]; } - [[nodiscard]] constexpr int & top () { return m_data[m_size-1]; } - [[nodiscard]] constexpr int operator[] (int i) const { return m_data[i]; } + [[nodiscard]] constexpr long long const& top () const { return m_data[m_size-1]; } + [[nodiscard]] constexpr long long & top () { return m_data[m_size-1]; } + [[nodiscard]] constexpr long long operator[] (int i) const { return m_data[i]; } }; AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int iparser_exe_eval (const char* p, int const* x) +long long iparser_exe_eval (const char* p, long long const* x) { - if (p == nullptr) { return std::numeric_limits::max(); } + if (p == nullptr) { return std::numeric_limits::max(); } IParserStack pstack; while (*((iparser_exe_t*)p) != IPARSER_EXE_NULL) { @@ -256,14 +256,14 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_SYMBOL: { int i = ((IParserExeSymbol*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.push(d); p += sizeof(IParserExeSymbol); break; } case IPARSER_EXE_ADD: { - int b = pstack.top(); + auto b = pstack.top(); pstack.pop(); pstack.top() += b; p += sizeof(IParserExeADD); @@ -271,7 +271,7 @@ int iparser_exe_eval (const char* p, int const* x) } case IPARSER_EXE_SUB: { - int b = pstack.top(); + auto b = pstack.top(); pstack.pop(); pstack.top() = (pstack.top() - b) * (((IParserExeSUB*)p)->sign); p += sizeof(IParserExeSUB); @@ -279,7 +279,7 @@ int iparser_exe_eval (const char* p, int const* x) } case IPARSER_EXE_MUL: { - int b = pstack.top(); + auto b = pstack.top(); pstack.pop(); pstack.top() *= b; p += sizeof(IParserExeMUL); @@ -287,7 +287,7 @@ int iparser_exe_eval (const char* p, int const* x) } case IPARSER_EXE_DIV_F: { - int v = pstack.top(); + auto v = pstack.top(); pstack.pop(); pstack.top() /= v; p += sizeof(IParserExeDIV_F); @@ -295,7 +295,7 @@ int iparser_exe_eval (const char* p, int const* x) } case IPARSER_EXE_DIV_B: { - int v = pstack.top(); + auto v = pstack.top(); pstack.pop(); pstack.top() = v / pstack.top(); p += sizeof(IParserExeDIV_B); @@ -315,7 +315,7 @@ int iparser_exe_eval (const char* p, int const* x) } case IPARSER_EXE_F2_F: { - int v = pstack.top(); + auto v = pstack.top(); pstack.pop(); pstack.top() = iparser_call_f2(((IParserExeF2_F*)p)->ftype, pstack.top(), v); p += sizeof(IParserExeF2_F); @@ -323,7 +323,7 @@ int iparser_exe_eval (const char* p, int const* x) } case IPARSER_EXE_F2_B: { - int v = pstack.top(); + auto v = pstack.top(); pstack.pop(); pstack.top() = iparser_call_f2(((IParserExeF2_B*)p)->ftype, v, pstack.top()); p += sizeof(IParserExeF2_B); @@ -332,7 +332,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_ADD_VP: { int i = ((IParserExeADD_VP*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.push(((IParserExeADD_VP*)p)->v + d); p += sizeof(IParserExeADD_VP); break; @@ -340,7 +340,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_SUB_VP: { int i = ((IParserExeSUB_VP*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.push(((IParserExeSUB_VP*)p)->v - d); p += sizeof(IParserExeSUB_VP); break; @@ -348,7 +348,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_MUL_VP: { int i = ((IParserExeMUL_VP*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.push(((IParserExeMUL_VP*)p)->v * d); p += sizeof(IParserExeMUL_VP); break; @@ -356,7 +356,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_DIV_VP: { int i = ((IParserExeDIV_VP*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.push(((IParserExeDIV_VP*)p)->v / d); p += sizeof(IParserExeDIV_VP); break; @@ -364,7 +364,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_DIV_PV: { int i = ((IParserExeDIV_PV*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.push(d / ((IParserExeDIV_PV*)p)->v); p += sizeof(IParserExeDIV_PV); break; @@ -372,9 +372,9 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_ADD_PP: { int i = ((IParserExeADD_PP*)p)->i1; - int d1 = AMREX_IPARSER_GET_DATA(i); + auto d1 = AMREX_IPARSER_GET_DATA(i); i = ((IParserExeADD_PP*)p)->i2; - int d2 = AMREX_IPARSER_GET_DATA(i); + auto d2 = AMREX_IPARSER_GET_DATA(i); pstack.push(d1+d2); p += sizeof(IParserExeADD_PP); break; @@ -382,9 +382,9 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_SUB_PP: { int i = ((IParserExeSUB_PP*)p)->i1; - int d1 = AMREX_IPARSER_GET_DATA(i); + auto d1 = AMREX_IPARSER_GET_DATA(i); i = ((IParserExeSUB_PP*)p)->i2; - int d2 = AMREX_IPARSER_GET_DATA(i); + auto d2 = AMREX_IPARSER_GET_DATA(i); pstack.push(d1-d2); p += sizeof(IParserExeSUB_PP); break; @@ -392,9 +392,9 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_MUL_PP: { int i = ((IParserExeMUL_PP*)p)->i1; - int d1 = AMREX_IPARSER_GET_DATA(i); + auto d1 = AMREX_IPARSER_GET_DATA(i); i = ((IParserExeMUL_PP*)p)->i2; - int d2 = AMREX_IPARSER_GET_DATA(i); + auto d2 = AMREX_IPARSER_GET_DATA(i); pstack.push(d1*d2); p += sizeof(IParserExeMUL_PP); break; @@ -402,9 +402,9 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_DIV_PP: { int i = ((IParserExeDIV_PP*)p)->i1; - int d1 = AMREX_IPARSER_GET_DATA(i); + auto d1 = AMREX_IPARSER_GET_DATA(i); i = ((IParserExeDIV_PP*)p)->i2; - int d2 = AMREX_IPARSER_GET_DATA(i); + auto d2 = AMREX_IPARSER_GET_DATA(i); pstack.push(d1/d2); p += sizeof(IParserExeDIV_PP); break; @@ -412,7 +412,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_NEG_P: { int i = ((IParserExeNEG_P*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.push(-d); p += sizeof(IParserExeNEG_P); break; @@ -450,7 +450,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_ADD_PN: { int i = ((IParserExeADD_PN*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.top() += d; p += sizeof(IParserExeADD_PN); break; @@ -458,7 +458,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_SUB_PN: { int i = ((IParserExeSUB_PN*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.top() = (d - pstack.top()) * (((IParserExeSUB_PN*)p)->sign); p += sizeof(IParserExeSUB_PN); break; @@ -466,7 +466,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_MUL_PN: { int i = ((IParserExeMUL_PN*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); pstack.top() *= d; p += sizeof(IParserExeMUL_PN); break; @@ -474,7 +474,7 @@ int iparser_exe_eval (const char* p, int const* x) case IPARSER_EXE_DIV_PN: { int i = ((IParserExeDIV_PN*)p)->i; - int d = AMREX_IPARSER_GET_DATA(i); + auto d = AMREX_IPARSER_GET_DATA(i); if (((IParserExeDIV_PN*)p)->reverse) { pstack.top() /= d; } else { @@ -485,9 +485,9 @@ int iparser_exe_eval (const char* p, int const* x) } case IPARSER_EXE_IF: { - int cond = pstack.top(); + auto cond = pstack.top(); pstack.pop(); - if (cond == 0.0) { // false branch + if (cond == 0) { // false branch p += ((IParserExeIF*)p)->offset; } p += sizeof(IParserExeIF); diff --git a/Src/Base/Parser/AMReX_IParser_Y.H b/Src/Base/Parser/AMReX_IParser_Y.H index 50eb5de39ec..79b6cd7551b 100644 --- a/Src/Base/Parser/AMReX_IParser_Y.H +++ b/Src/Base/Parser/AMReX_IParser_Y.H @@ -75,7 +75,7 @@ enum iparser_node_t { union iparser_nvp { struct iparser_node* n; - int v; + long long v; int ip; }; @@ -89,7 +89,7 @@ struct iparser_node { struct iparser_number { enum iparser_node_t type; - int value; + long long value; }; struct iparser_symbol { @@ -134,7 +134,7 @@ void iparser_defexpr (struct iparser_node* body); struct iparser_symbol* iparser_makesymbol (char* name); struct iparser_node* iparser_newnode (enum iparser_node_t type, struct iparser_node* l, struct iparser_node* r); -struct iparser_node* iparser_newnumber (int d); +struct iparser_node* iparser_newnumber (long long d); struct iparser_node* iparser_newsymbol (struct iparser_symbol* sym); struct iparser_node* iparser_newf1 (enum iparser_f1_t ftype, struct iparser_node* l); struct iparser_node* iparser_newf2 (enum iparser_f2_t ftype, struct iparser_node* l, @@ -164,7 +164,7 @@ struct amrex_iparser* iparser_dup (struct amrex_iparser* source); struct iparser_node* iparser_ast_dup (struct amrex_iparser* iparser, struct iparser_node* node, int move); void iparser_regvar (struct amrex_iparser* iparser, char const* name, int i); -void iparser_setconst (struct amrex_iparser* iparser, char const* name, int c); +void iparser_setconst (struct amrex_iparser* iparser, char const* name, long long c); void iparser_print (struct amrex_iparser* iparser); std::set iparser_get_symbols (struct amrex_iparser* iparser); int iparser_depth (struct amrex_iparser* iparser); @@ -174,27 +174,27 @@ void iparser_ast_optimize (struct iparser_node* node); std::size_t iparser_ast_size (struct iparser_node* node); void iparser_ast_print (struct iparser_node* node, std::string const& space, AllPrint& printer); void iparser_ast_regvar (struct iparser_node* node, char const* name, int i); -void iparser_ast_setconst (struct iparser_node* node, char const* name, int c); +void iparser_ast_setconst (struct iparser_node* node, char const* name, long long c); void iparser_ast_get_symbols (struct iparser_node* node, std::set& symbols, std::set& local_symbols); int iparser_ast_depth (struct iparser_node* node); /*******************************************************************/ -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int -iparser_call_f1 (enum iparser_f1_t /*type*/, int a) +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE long long +iparser_call_f1 (enum iparser_f1_t /*type*/, long long a) { /// There is only one type for now return std::abs(a); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int -iparser_call_f2 (enum iparser_f2_t type, int a, int b) +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE long long +iparser_call_f2 (enum iparser_f2_t type, long long a, long long b) { switch (type) { case IPARSER_FLRDIV: { - int r = a/b; + long long r = a/b; if (r*b == a || (a < 0 && b < 0) || (a > 0 && b > 0)) { return r; } else { @@ -206,13 +206,13 @@ iparser_call_f2 (enum iparser_f2_t type, int a, int b) if (b < 0) { return 0; } else { - int r = 1; + long long r = 1; while (b != 0) { if (b & 1) { r *= a; } b >>= 1; - a *= a; + if (b > 0) { a *= a; } // to avoid overflow } return r; } @@ -243,13 +243,15 @@ iparser_call_f2 (enum iparser_f2_t type, int a, int b) } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int -iparser_call_f3 (enum iparser_f3_t /*type*/, int a, int b, int c) +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE long long +iparser_call_f3 (enum iparser_f3_t /*type*/, long long a, long long b, long long c) { // There is only one type currently return (a != 0) ? b : c; } +long long iparser_atoll (const char* str); + } #endif diff --git a/Src/Base/Parser/AMReX_IParser_Y.cpp b/Src/Base/Parser/AMReX_IParser_Y.cpp index cfb036de96e..00cca1717f0 100644 --- a/Src/Base/Parser/AMReX_IParser_Y.cpp +++ b/Src/Base/Parser/AMReX_IParser_Y.cpp @@ -51,7 +51,7 @@ iparser_newnode (enum iparser_node_t type, struct iparser_node* l, struct iparse } struct iparser_node* -iparser_newnumber (int d) +iparser_newnumber (long long d) { auto *r = (struct iparser_number*) std::malloc(sizeof(struct iparser_number)); r->type = IPARSER_NUMBER; @@ -398,7 +398,7 @@ iparser_ast_optimize (struct iparser_node* node) if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_NUMBER) { - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value + ((struct iparser_number*)(node->r))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; @@ -428,28 +428,28 @@ iparser_ast_optimize (struct iparser_node* node) else if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_ADD_VP) { - int v = ((struct iparser_number*)(node->l))->value + IPARSER_EVAL_R(node); + auto v = ((struct iparser_number*)(node->l))->value + IPARSER_EVAL_R(node); IPARSER_MOVEUP_R(node, v); node->type = IPARSER_ADD_VP; } else if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_SUB_VP) { - int v = ((struct iparser_number*)(node->l))->value + IPARSER_EVAL_R(node); + auto v = ((struct iparser_number*)(node->l))->value + IPARSER_EVAL_R(node); IPARSER_MOVEUP_R(node, v); node->type = IPARSER_SUB_VP; } else if (node->l->type == IPARSER_ADD_VP && node->r->type == IPARSER_NUMBER) { - int v = IPARSER_EVAL_L(node) + ((struct iparser_number*)(node->r))->value; + auto v = IPARSER_EVAL_L(node) + ((struct iparser_number*)(node->r))->value; IPARSER_MOVEUP_L(node, v); node->type = IPARSER_ADD_VP; } else if (node->l->type == IPARSER_SUB_VP && node->r->type == IPARSER_NUMBER) { - int v = IPARSER_EVAL_L(node) + ((struct iparser_number*)(node->r))->value; + auto v = IPARSER_EVAL_L(node) + ((struct iparser_number*)(node->r))->value; IPARSER_MOVEUP_L(node, v); node->type = IPARSER_SUB_VP; } @@ -458,14 +458,14 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->r->l->type == IPARSER_NUMBER) { // #l + (#rl + node_rr) -> (#l + #rl) + node_rr, same type - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value + ((struct iparser_number*)(node->r->l))->value; node->r = node->r->r; ((struct iparser_number*)(node->l))->value = v; } else if (node->r->r->type == IPARSER_NUMBER) { // #l + (node_rl + #rr) -> (#l + #rr) + node_rl, same type - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value + ((struct iparser_number*)(node->r->r))->value; node->r = node->r->l; ((struct iparser_number*)(node->l))->value = v; @@ -476,7 +476,7 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->r->l->type == IPARSER_NUMBER) { // #l + (#rl - node_rr) -> (#l + #rl) - node_rr, type change - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value + ((struct iparser_number*)(node->r->l))->value; node->r = node->r->r; ((struct iparser_number*)(node->l))->value = v; @@ -484,7 +484,7 @@ iparser_ast_optimize (struct iparser_node* node) } else if (node->r->r->type == IPARSER_NUMBER) { // #l + (node_rl - #rr) -> (#l - #rr) + node_rl, same type - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value - ((struct iparser_number*)(node->r->r))->value; node->r = node->r->l; ((struct iparser_number*)(node->l))->value = v; @@ -495,14 +495,14 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->l->l->type == IPARSER_NUMBER) { // (#ll + node_lr) + #r -> nodel_lr + (#ll + #r), same type - int v = ((struct iparser_number*)(node->l->l))->value + auto v= ((struct iparser_number*)(node->l->l))->value + ((struct iparser_number*)(node->r))->value; node->l = node->l->r; ((struct iparser_number*)(node->r))->value = v; } else if (node->l->r->type == IPARSER_NUMBER) { // (node_ll + #lr) + #r -> node_ll + (#lr + #r), same type - int v = ((struct iparser_number*)(node->l->r))->value + auto v= ((struct iparser_number*)(node->l->r))->value + ((struct iparser_number*)(node->r))->value; node->l = node->l->l; ((struct iparser_number*)(node->r))->value = v; @@ -513,7 +513,7 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->l->l->type == IPARSER_NUMBER) { // (#ll - node_lr) + #r -> (#ll + #r) - node_lr, type change - int v = ((struct iparser_number*)(node->l->l))->value + auto v= ((struct iparser_number*)(node->l->l))->value + ((struct iparser_number*)(node->r))->value; node->r = node->l->r; ((struct iparser_number*)(node->l))->type = IPARSER_NUMBER; @@ -522,7 +522,7 @@ iparser_ast_optimize (struct iparser_node* node) } else if (node->l->r->type == IPARSER_NUMBER) { // (node_ll - #lr) + #r -> node_ll + (#r - #lr), same type - int v = ((struct iparser_number*)(node->r))->value + auto v= ((struct iparser_number*)(node->r))->value - ((struct iparser_number*)(node->l->r))->value; node->l = node->l->l; ((struct iparser_number*)(node->r))->value = v; @@ -536,7 +536,7 @@ iparser_ast_optimize (struct iparser_node* node) if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_NUMBER) { - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value - ((struct iparser_number*)(node->r))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; @@ -566,28 +566,28 @@ iparser_ast_optimize (struct iparser_node* node) else if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_ADD_VP) { - int v = ((struct iparser_number*)(node->l))->value - IPARSER_EVAL_R(node); + auto v= ((struct iparser_number*)(node->l))->value - IPARSER_EVAL_R(node); IPARSER_MOVEUP_R(node, v); node->type = IPARSER_SUB_VP; } else if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_SUB_VP) { - int v = ((struct iparser_number*)(node->l))->value - IPARSER_EVAL_R(node); + auto v= ((struct iparser_number*)(node->l))->value - IPARSER_EVAL_R(node); IPARSER_MOVEUP_R(node, v); node->type = IPARSER_ADD_VP; } else if (node->l->type == IPARSER_ADD_VP && node->r->type == IPARSER_NUMBER) { - int v = IPARSER_EVAL_L(node) - ((struct iparser_number*)(node->r))->value; + auto v= IPARSER_EVAL_L(node) - ((struct iparser_number*)(node->r))->value; IPARSER_MOVEUP_L(node, v); node->type = IPARSER_ADD_VP; } else if (node->l->type == IPARSER_SUB_VP && node->r->type == IPARSER_NUMBER) { - int v = IPARSER_EVAL_L(node) - ((struct iparser_number*)(node->r))->value; + auto v= IPARSER_EVAL_L(node) - ((struct iparser_number*)(node->r))->value; IPARSER_MOVEUP_L(node, v); node->type = IPARSER_SUB_VP; } @@ -596,14 +596,14 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->r->l->type == IPARSER_NUMBER) { // #l - (#rl + node_rr) -> (#l - #rl) - node_rr, same type - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value - ((struct iparser_number*)(node->r->l))->value; node->r = node->r->r; ((struct iparser_number*)(node->l))->value = v; } else if (node->r->r->type == IPARSER_NUMBER) { // #l - (node_rl + #rr) -> (#l - #rr) - node_rl, same type - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value - ((struct iparser_number*)(node->r->r))->value; node->r = node->r->l; ((struct iparser_number*)(node->l))->value = v; @@ -614,7 +614,7 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->r->l->type == IPARSER_NUMBER) { // #l - (#rl - node_rr) -> (#l - #rl) + node_rr, type change - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value - ((struct iparser_number*)(node->r->l))->value; node->r = node->r->r; ((struct iparser_number*)(node->l))->value = v; @@ -622,7 +622,7 @@ iparser_ast_optimize (struct iparser_node* node) } else if (node->r->r->type == IPARSER_NUMBER) { // #l - (node_rl - #rr) -> (#l + #rr) - node_rl, same type - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value + ((struct iparser_number*)(node->r->r))->value; node->r = node->r->l; ((struct iparser_number*)(node->l))->value = v; @@ -633,14 +633,14 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->l->l->type == IPARSER_NUMBER) { // (#ll + node_lr) - #r -> node_lr - (#r - #ll), same type - int v = ((struct iparser_number*)(node->r))->value + auto v= ((struct iparser_number*)(node->r))->value - ((struct iparser_number*)(node->l->l))->value; node->l = node->l->r; ((struct iparser_number*)(node->r))->value = v; } else if (node->l->r->type == IPARSER_NUMBER) { // (node_ll + #lr) - #r -> node_ll - (#r - #lr), same type - int v = ((struct iparser_number*)(node->r))->value + auto v= ((struct iparser_number*)(node->r))->value - ((struct iparser_number*)(node->l->r))->value; node->l = node->l->l; ((struct iparser_number*)(node->r))->value = v; @@ -651,7 +651,7 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->l->l->type == IPARSER_NUMBER) { // (#ll - node_lr) - #r -> (#ll - #r) - node_lr, type change - int v = ((struct iparser_number*)(node->l->l))->value + auto v= ((struct iparser_number*)(node->l->l))->value - ((struct iparser_number*)(node->r))->value; node->r = node->l->r; node->l->type = IPARSER_NUMBER; @@ -659,7 +659,7 @@ iparser_ast_optimize (struct iparser_node* node) } else if (node->l->r->type == IPARSER_NUMBER) { // (node_ll - #lr) - #r -> node_ll - (#r + #lr), same type - int v = ((struct iparser_number*)(node->r))->value + auto v= ((struct iparser_number*)(node->r))->value + ((struct iparser_number*)(node->l->r))->value; node->l = node->l->l; ((struct iparser_number*)(node->r))->value = v; @@ -673,7 +673,7 @@ iparser_ast_optimize (struct iparser_node* node) if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_NUMBER) { - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value * ((struct iparser_number*)(node->r))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; @@ -703,14 +703,14 @@ iparser_ast_optimize (struct iparser_node* node) else if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_MUL_VP) { - int v = ((struct iparser_number*)(node->l))->value * IPARSER_EVAL_R(node); + auto v= ((struct iparser_number*)(node->l))->value * IPARSER_EVAL_R(node); IPARSER_MOVEUP_R(node, v); node->type = IPARSER_MUL_VP; } else if (node->l->type == IPARSER_MUL_VP && node->r->type == IPARSER_NUMBER) { - int v = IPARSER_EVAL_L(node) * ((struct iparser_number*)(node->r))->value; + auto v= IPARSER_EVAL_L(node) * ((struct iparser_number*)(node->r))->value; IPARSER_MOVEUP_L(node, v); node->type = IPARSER_MUL_VP; } @@ -719,14 +719,14 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->r->l->type == IPARSER_NUMBER) { // #l * (#rl * node_rr) -> (#l * #rl) * node_rr, same type - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value * ((struct iparser_number*)(node->r->l))->value; node->r = node->r->r; ((struct iparser_number*)(node->l))->value = v; } else if (node->r->r->type == IPARSER_NUMBER) { // #l * (node_rl * #rr) -> (#l * #rr) * node_rl, same type - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value * ((struct iparser_number*)(node->r->r))->value; node->r = node->r->l; ((struct iparser_number*)(node->l))->value = v; @@ -737,14 +737,14 @@ iparser_ast_optimize (struct iparser_node* node) { if (node->l->l->type == IPARSER_NUMBER) { // (#ll * node_lr) * #r -> nodel_lr * (#ll * #r), same type - int v = ((struct iparser_number*)(node->l->l))->value + auto v= ((struct iparser_number*)(node->l->l))->value * ((struct iparser_number*)(node->r))->value; node->l = node->l->r; ((struct iparser_number*)(node->r))->value = v; } else if (node->l->r->type == IPARSER_NUMBER) { // (node_ll * #lr) * #r -> node_ll + (#lr * #r), same type - int v = ((struct iparser_number*)(node->l->r))->value + auto v= ((struct iparser_number*)(node->l->r))->value * ((struct iparser_number*)(node->r))->value; node->l = node->l->l; ((struct iparser_number*)(node->r))->value = v; @@ -758,7 +758,7 @@ iparser_ast_optimize (struct iparser_node* node) if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_NUMBER) { - int v = ((struct iparser_number*)(node->l))->value + auto v= ((struct iparser_number*)(node->l))->value / ((struct iparser_number*)(node->r))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; @@ -790,7 +790,7 @@ iparser_ast_optimize (struct iparser_node* node) iparser_ast_optimize(node->l); if (node->l->type == IPARSER_NUMBER) { - int v = -((struct iparser_number*)(node->l))->value; + auto v= -((struct iparser_number*)(node->l))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; } @@ -876,7 +876,7 @@ iparser_ast_optimize (struct iparser_node* node) iparser_ast_optimize(node->l); if (node->l->type == IPARSER_NUMBER) { - int v = iparser_call_f1 + auto v= iparser_call_f1 (((struct iparser_f1*)node)->ftype, ((struct iparser_number*)(((struct iparser_f1*)node)->l))->value); ((struct iparser_number*)node)->type = IPARSER_NUMBER; @@ -889,7 +889,7 @@ iparser_ast_optimize (struct iparser_node* node) if (node->l->type == IPARSER_NUMBER && node->r->type == IPARSER_NUMBER) { - int v = iparser_call_f2 + auto v= iparser_call_f2 (((struct iparser_f2*)node)->ftype, ((struct iparser_number*)(((struct iparser_f2*)node)->l))->value, ((struct iparser_number*)(((struct iparser_f2*)node)->r))->value); @@ -905,7 +905,7 @@ iparser_ast_optimize (struct iparser_node* node) ((struct iparser_f3*)node)->n2->type == IPARSER_NUMBER && ((struct iparser_f3*)node)->n3->type == IPARSER_NUMBER) { - int v = iparser_call_f3 + auto v= iparser_call_f3 (((struct iparser_f3*)node)->ftype, ((struct iparser_number*)(((struct iparser_f3*)node)->n1))->value, ((struct iparser_number*)(((struct iparser_f3*)node)->n2))->value, @@ -918,7 +918,7 @@ iparser_ast_optimize (struct iparser_node* node) iparser_ast_optimize(node->r); if (node->r->type == IPARSER_NUMBER) { - int v = node->lvp.v + ((struct iparser_number*)(node->r))->value; + auto v= node->lvp.v + ((struct iparser_number*)(node->r))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; } @@ -927,7 +927,7 @@ iparser_ast_optimize (struct iparser_node* node) iparser_ast_optimize(node->r); if (node->r->type == IPARSER_NUMBER) { - int v = node->lvp.v - ((struct iparser_number*)(node->r))->value; + auto v= node->lvp.v - ((struct iparser_number*)(node->r))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; } @@ -936,7 +936,7 @@ iparser_ast_optimize (struct iparser_node* node) iparser_ast_optimize(node->r); if (node->r->type == IPARSER_NUMBER) { - int v = node->lvp.v * ((struct iparser_number*)(node->r))->value; + auto v= node->lvp.v * ((struct iparser_number*)(node->r))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; } @@ -945,7 +945,7 @@ iparser_ast_optimize (struct iparser_node* node) iparser_ast_optimize(node->r); if (node->r->type == IPARSER_NUMBER) { - int v = node->lvp.v / ((struct iparser_number*)(node->r))->value; + auto v= node->lvp.v / ((struct iparser_number*)(node->r))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; } @@ -954,7 +954,7 @@ iparser_ast_optimize (struct iparser_node* node) iparser_ast_optimize(node->r); if (node->r->type == IPARSER_NUMBER) { - int v = ((struct iparser_number*)(node->r))->value / node->lvp.v; + auto v= ((struct iparser_number*)(node->r))->value / node->lvp.v; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; } @@ -963,7 +963,7 @@ iparser_ast_optimize (struct iparser_node* node) iparser_ast_optimize(node->l); if (node->l->type == IPARSER_NUMBER) { - int v = -((struct iparser_number*)(node->l))->value; + auto v= -((struct iparser_number*)(node->l))->value; ((struct iparser_number*)node)->type = IPARSER_NUMBER; ((struct iparser_number*)node)->value = v; } @@ -1280,7 +1280,7 @@ iparser_ast_regvar (struct iparser_node* node, char const* name, int i) } } -void iparser_ast_setconst (struct iparser_node* node, char const* name, int c) +void iparser_ast_setconst (struct iparser_node* node, char const* name, long long c) { switch (node->type) { @@ -1396,7 +1396,7 @@ iparser_regvar (struct amrex_iparser* iparser, char const* name, int i) } void -iparser_setconst (struct amrex_iparser* iparser, char const* name, int c) +iparser_setconst (struct amrex_iparser* iparser, char const* name, long long c) { iparser_ast_setconst(iparser->ast, name, c); iparser_ast_optimize(iparser->ast); @@ -1427,4 +1427,39 @@ iparser_depth (struct amrex_iparser* iparser) return iparser_ast_depth(iparser->ast); } +long long +iparser_atoll (const char* str) +{ + std::string s(str); + s.erase(std::remove(s.begin(), s.end(), '\''), s.end()); + + auto pos_E = s.find('E'); + if (pos_E != std::string::npos) { + s[pos_E] = 'e'; + } + auto pos_e = s.find('e'); + if (pos_e != std::string::npos) { + std::string part_1 = s.substr(0, pos_e); + int ex = std::atoi(s.c_str()+pos_e+1); + auto pos_dot = part_1.find('.'); + if (pos_dot != std::string::npos) { + // iparser'number does not have more than one dot. + ex -= static_cast(part_1.size()-(pos_dot+1)); + part_1.erase(pos_dot,1); + } + if (ex < 0) { + throw std::runtime_error(std::string(str) + " is not an integer"); + } + part_1.resize(part_1.size()+ex,'0'); + return std::atoll(part_1.c_str()); + } else { + auto pos_dot = s.find('.'); + if (pos_dot != std::string::npos && pos_dot+1 < s.size()) { + throw std::runtime_error(std::string(str) + " is not an integer"); + } + // Note that atoll works as expected for numbers ending with `.` like `123.`. + return std::atoll(s.c_str()); + } +} + } diff --git a/Src/Base/Parser/amrex_iparser.l b/Src/Base/Parser/amrex_iparser.l index 83c29a0ec62..15f19b90525 100644 --- a/Src/Base/Parser/amrex_iparser.l +++ b/Src/Base/Parser/amrex_iparser.l @@ -15,6 +15,9 @@ /* Tokens NUMBER, SYMBOL, F1, POW, F2, etc. are defined in amrex_iparser.y. */ /* Types IPARSER_ABS, IPARSER_MIN etc. are defined in AMReX_IParser_y.H. */ + /* Used later to define NUMBER */ +EXP ([Ee][0-9]+) + %% "+" | @@ -48,10 +51,12 @@ "if" { amrex_iparserlval.f3 = amrex::IPARSER_IF; return F3; } /* We use SYMBOL to hold variables and constants */ -[a-zA-Z_][a-zA-Z0-9_]* { amrex_iparserlval.s = amrex::iparser_makesymbol(amrex_iparsertext); return SYMBOL; } +[a-zA-Z_][a-zA-Z0-9_\.]* { amrex_iparserlval.s = amrex::iparser_makesymbol(amrex_iparsertext); return SYMBOL; } /* Number */ -[0-9]+ { amrex_iparserlval.d = std::atoi(amrex_iparsertext); return NUMBER; } +[0-9][0-9']* | +[0-9]+"."[0-9]*{EXP}? | +"."?[0-9]+{EXP}? { amrex_iparserlval.d = amrex::iparser_atoll(amrex_iparsertext); return NUMBER; } /* Special characters */ [ \t] /* ignore white space */ diff --git a/Src/Base/Parser/amrex_iparser.lex.nolint.H b/Src/Base/Parser/amrex_iparser.lex.nolint.H index 0d062f79b64..411c4210b8a 100644 --- a/Src/Base/Parser/amrex_iparser.lex.nolint.H +++ b/Src/Base/Parser/amrex_iparser.lex.nolint.H @@ -611,8 +611,8 @@ static void yynoreturn yy_fatal_error ( const char* msg ); (yy_hold_char) = *yy_cp; \ *yy_cp = '\0'; \ (yy_c_buf_p) = yy_cp; -#define YY_NUM_RULES 32 -#define YY_END_OF_BUFFER 33 +#define YY_NUM_RULES 34 +#define YY_END_OF_BUFFER 35 /* This struct is not used in this scanner, but its presence is necessary. */ struct yy_trans_info @@ -620,13 +620,14 @@ struct yy_trans_info flex_int32_t yy_verify; flex_int32_t yy_nxt; }; -static const flex_int16_t yy_accept[50] = +static const flex_int16_t yy_accept[59] = { 0, - 0, 0, 33, 31, 28, 30, 31, 10, 11, 3, - 1, 6, 2, 4, 27, 9, 7, 5, 8, 26, - 31, 16, 26, 26, 26, 26, 20, 15, 14, 27, - 18, 19, 17, 26, 29, 26, 26, 26, 25, 26, - 26, 22, 12, 21, 26, 24, 23, 13, 0 + 0, 0, 35, 33, 30, 32, 33, 10, 11, 3, + 1, 6, 2, 33, 4, 27, 9, 7, 5, 8, + 26, 33, 16, 26, 26, 26, 26, 20, 15, 29, + 14, 27, 28, 27, 0, 18, 19, 17, 26, 31, + 26, 26, 26, 25, 26, 26, 22, 28, 0, 29, + 12, 21, 26, 24, 23, 28, 13, 0 } ; static const YY_CHAR yy_ec[256] = @@ -634,17 +635,17 @@ static const YY_CHAR yy_ec[256] = 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 4, 1, 1, 1, 1, 1, 1, 5, - 6, 7, 8, 9, 10, 1, 11, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 1, 13, 14, - 15, 16, 1, 1, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 1, 18, 1, 19, 17, 1, 20, 21, 17, 22, - - 17, 23, 17, 17, 24, 17, 17, 17, 25, 26, - 27, 17, 17, 28, 29, 17, 17, 17, 17, 30, - 17, 17, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 4, 1, 1, 1, 1, 1, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 1, 15, 16, + 17, 18, 1, 1, 19, 19, 19, 19, 20, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 1, 21, 1, 22, 19, 1, 23, 24, 19, 25, + + 20, 26, 19, 19, 27, 19, 19, 19, 28, 29, + 30, 19, 19, 31, 32, 19, 19, 19, 19, 33, + 19, 19, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -661,55 +662,64 @@ static const YY_CHAR yy_ec[256] = 1, 1, 1, 1, 1 } ; -static const YY_CHAR yy_meta[31] = +static const YY_CHAR yy_meta[34] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + 1, 2, 1, 2, 1, 1, 1, 1, 2, 2, + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2 } ; -static const flex_int16_t yy_base[51] = +static const flex_int16_t yy_base[60] = { 0, - 0, 0, 54, 55, 55, 55, 38, 55, 55, 45, - 55, 55, 55, 40, 38, 55, 34, 33, 32, 0, - 43, 55, 10, 12, 13, 17, 55, 55, 55, 32, - 55, 55, 55, 0, 55, 14, 20, 20, 0, 10, - 13, 0, 0, 0, 9, 0, 0, 0, 55, 32 + 0, 0, 76, 77, 77, 77, 58, 77, 77, 66, + 77, 77, 77, 59, 59, 29, 77, 54, 53, 52, + 0, 65, 77, 11, 13, 15, 36, 77, 77, 30, + 77, 32, 31, 42, 52, 77, 77, 77, 0, 77, + 33, 39, 39, 0, 28, 31, 0, 38, 45, 43, + 0, 0, 23, 0, 0, 39, 0, 77, 46 } ; -static const flex_int16_t yy_def[51] = +static const flex_int16_t yy_def[60] = { 0, - 49, 1, 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, 49, 50, - 49, 49, 50, 50, 50, 50, 49, 49, 49, 49, - 49, 49, 49, 50, 49, 50, 50, 50, 50, 50, - 50, 50, 50, 50, 50, 50, 50, 50, 0, 49 + 58, 1, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 59, 58, 58, 59, 59, 59, 59, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 59, 58, + 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, + 59, 59, 59, 59, 59, 58, 59, 0, 58 } ; -static const flex_int16_t yy_nxt[86] = +static const flex_int16_t yy_nxt[111] = { 0, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 20, 20, 20, 24, 25, 20, 26, 20, 20, 20, - 36, 38, 40, 34, 39, 37, 41, 48, 47, 46, - 45, 44, 43, 30, 42, 35, 33, 32, 31, 30, - 29, 28, 27, 49, 3, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49 + 4, 5, 6, 7, 4, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 21, + 22, 23, 24, 21, 21, 21, 25, 26, 21, 27, + 21, 21, 21, 32, 41, 43, 32, 45, 44, 42, + 33, 46, 34, 30, 48, 32, 32, 39, 35, 35, + 49, 48, 56, 33, 57, 34, 50, 49, 56, 55, + 54, 35, 53, 52, 51, 50, 47, 40, 38, 37, + 36, 31, 30, 29, 28, 58, 3, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58 } ; -static const flex_int16_t yy_chk[86] = +static const flex_int16_t yy_chk[111] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 23, 24, 25, 50, 24, 23, 25, 45, 41, 40, - 38, 37, 36, 30, 26, 21, 19, 18, 17, 15, - 14, 10, 7, 3, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49 + 1, 1, 1, 16, 24, 25, 32, 26, 25, 24, + 16, 26, 16, 30, 33, 32, 34, 59, 16, 30, + 33, 48, 56, 34, 53, 34, 50, 48, 49, 46, + 45, 34, 43, 42, 41, 35, 27, 22, 20, 19, + 18, 15, 14, 10, 7, 3, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58 } ; static yy_state_type yy_last_accepting_state; @@ -734,6 +744,7 @@ char *yytext; #define YY_NO_INPUT 1 /* Tokens NUMBER, SYMBOL, F1, POW, F2, etc. are defined in amrex_iparser.y. */ /* Types IPARSER_ABS, IPARSER_MIN etc. are defined in AMReX_IParser_y.H. */ + /* Used later to define NUMBER */ #define INITIAL 0 @@ -974,13 +985,13 @@ yy_match: while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 50 ) + if ( yy_current_state >= 59 ) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; ++yy_cp; } - while ( yy_current_state != 49 ); + while ( yy_current_state != 58 ); yy_cp = (yy_last_accepting_cpos); yy_current_state = (yy_last_accepting_state); @@ -1079,30 +1090,32 @@ YY_RULE_SETUP YY_BREAK /* Number */ case 27: +case 28: +case 29: YY_RULE_SETUP -{ amrex_iparserlval.d = std::atoi(amrex_iparsertext); return NUMBER; } +{ amrex_iparserlval.d = amrex::iparser_atoll(amrex_iparsertext); return NUMBER; } YY_BREAK /* Special characters */ -case 28: +case 30: YY_RULE_SETUP /* ignore white space */ YY_BREAK -case 29: -/* rule 29 can match eol */ +case 31: +/* rule 31 can match eol */ YY_RULE_SETUP /* ignore line continuation */ YY_BREAK -case 30: -/* rule 30 can match eol */ +case 32: +/* rule 32 can match eol */ YY_RULE_SETUP { return EOL; } YY_BREAK /* everything else */ -case 31: +case 33: YY_RULE_SETUP { amrex_iparsererror("Unknown character %c", *amrex_iparsertext); } YY_BREAK -case 32: +case 34: YY_RULE_SETUP YY_FATAL_ERROR( "flex scanner jammed" ); YY_BREAK @@ -1403,7 +1416,7 @@ static int yy_get_next_buffer (void) while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 50 ) + if ( yy_current_state >= 59 ) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; @@ -1431,11 +1444,11 @@ static int yy_get_next_buffer (void) while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 50 ) + if ( yy_current_state >= 59 ) yy_c = yy_meta[yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; - yy_is_jam = (yy_current_state == 49); + yy_is_jam = (yy_current_state == 58); return yy_is_jam ? 0 : yy_current_state; } diff --git a/Src/Base/Parser/amrex_iparser.tab.h b/Src/Base/Parser/amrex_iparser.tab.h index 58d607b3415..a72ba4aeed2 100644 --- a/Src/Base/Parser/amrex_iparser.tab.h +++ b/Src/Base/Parser/amrex_iparser.tab.h @@ -89,7 +89,7 @@ union AMREX_IPARSERSTYPE { struct amrex::iparser_node* n; - int d; + long long d; struct amrex::iparser_symbol* s; enum amrex::iparser_f1_t f1; enum amrex::iparser_f2_t f2; diff --git a/Src/Base/Parser/amrex_iparser.y b/Src/Base/Parser/amrex_iparser.y index a079c55698b..2cfd83d07af 100644 --- a/Src/Base/Parser/amrex_iparser.y +++ b/Src/Base/Parser/amrex_iparser.y @@ -26,7 +26,7 @@ int amrex_iparserlex (void); */ %union { struct amrex::iparser_node* n; - int d; + long long d; struct amrex::iparser_symbol* s; enum amrex::iparser_f1_t f1; enum amrex::iparser_f2_t f2; diff --git a/Src/Base/Parser/amrex_parser.l b/Src/Base/Parser/amrex_parser.l index 1854376ffaf..e619bd077f2 100644 --- a/Src/Base/Parser/amrex_parser.l +++ b/Src/Base/Parser/amrex_parser.l @@ -76,7 +76,7 @@ EXP ([Ee][-+]?[0-9]+) "if" { amrex_parserlval.f3 = amrex::PARSER_IF; return F3; } /* We use SYMBOL to hold variables and constants */ -[a-zA-Z_][a-zA-Z0-9_]* { amrex_parserlval.s = amrex::parser_makesymbol(amrex_parsertext); return SYMBOL; } +[a-zA-Z_][a-zA-Z0-9_\.]* { amrex_parserlval.s = amrex::parser_makesymbol(amrex_parsertext); return SYMBOL; } /* Number */ [0-9]+"."[0-9]*{EXP}? | diff --git a/Src/Base/Parser/amrex_parser.lex.nolint.H b/Src/Base/Parser/amrex_parser.lex.nolint.H index cc4021e53c7..a3cb9ad8830 100644 --- a/Src/Base/Parser/amrex_parser.lex.nolint.H +++ b/Src/Base/Parser/amrex_parser.lex.nolint.H @@ -674,7 +674,7 @@ static const YY_CHAR yy_ec[256] = static const YY_CHAR yy_meta[48] = { 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, + 3, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 diff --git a/Src/Boundary/Make.package b/Src/Boundary/Make.package index 7dae7ec913f..009591b2b47 100644 --- a/Src/Boundary/Make.package +++ b/Src/Boundary/Make.package @@ -1,3 +1,5 @@ +ifndef AMREX_BOUNDARY_MAKE + AMREX_BOUNDARY_MAKE := 1 CEXE_sources += AMReX_Mask.cpp AMReX_MultiMask.cpp AMReX_LO_BCTYPES.cpp @@ -27,3 +29,4 @@ endif VPATH_LOCATIONS += $(AMREX_HOME)/Src/Boundary INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Boundary +endif diff --git a/Src/CMakeLists.txt b/Src/CMakeLists.txt index 56cd426efb9..6e8af043e0d 100644 --- a/Src/CMakeLists.txt +++ b/Src/CMakeLists.txt @@ -43,7 +43,6 @@ include(AMReXParallelBackends) # Add definitions # include(AMReXSetDefines) - # # Find and link third party libraries if needed # @@ -188,7 +187,7 @@ if (AMReX_SENSEI) add_subdirectory(Extern/SENSEI) endif () -if (AMReX_CONDUIT OR AMReX_ASCENT) +if (AMReX_CONDUIT OR AMReX_ASCENT OR AMReX_CATALYST) add_subdirectory(Extern/Conduit) endif () diff --git a/Src/EB/AMReX_EB2.cpp b/Src/EB/AMReX_EB2.cpp index 87ecb9c4392..f99eb504d2f 100644 --- a/Src/EB/AMReX_EB2.cpp +++ b/Src/EB/AMReX_EB2.cpp @@ -214,11 +214,11 @@ Build (const Geometry& geom, int required_coarsening_level, pp.queryAdd("stl_scale", stl_scale); std::vector stl_center{0.0_rt, 0.0_rt, 0.0_rt}; pp.queryAdd("stl_center", stl_center); - int stl_reverse_normal = 0; + bool stl_reverse_normal = false; pp.queryAdd("stl_reverse_normal", stl_reverse_normal); IndexSpace::push(new IndexSpaceSTL(stl_file, stl_scale, // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks) {stl_center[0], stl_center[1], stl_center[2]}, - stl_reverse_normal, + int(stl_reverse_normal), geom, required_coarsening_level, max_coarsening_level, ngrow, build_coarse_level_by_coarsening, diff --git a/Src/EB/AMReX_EB2_GeometryShop.H b/Src/EB/AMReX_EB2_GeometryShop.H index 1be28d536c4..12789310057 100644 --- a/Src/EB/AMReX_EB2_GeometryShop.H +++ b/Src/EB/AMReX_EB2_GeometryShop.H @@ -241,7 +241,7 @@ public: { const auto& problo = geom.ProbLoArray(); const auto& dx = geom.CellSizeArray(); - auto f = m_f; + auto const& f = m_f; ReduceOps reduce_op; ReduceData reduce_data(reduce_op); using ReduceTuple = typename decltype(reduce_data)::Type; @@ -297,13 +297,13 @@ public: void fillFab (BaseFab& levelset, const Geometry& geom, RunOn run_on, Box const& bounding_box) const noexcept { - const auto problo = geom.ProbLoArray(); - const auto dx = geom.CellSizeArray(); + const auto& problo = geom.ProbLoArray(); + const auto& dx = geom.CellSizeArray(); const Box& bx = levelset.box(); const auto& a = levelset.array(); - const auto blo = amrex::lbound(bounding_box); - const auto bhi = amrex::ubound(bounding_box); - auto f = m_f; + const auto& blo = amrex::lbound(bounding_box); + const auto& bhi = amrex::ubound(bounding_box); + const auto& f = m_f; AMREX_HOST_DEVICE_FOR_3D_FLAG(run_on, bx, i, j, k, { a(i,j,k) = f(AMREX_D_DECL(problo[0]+amrex::Clamp(i,blo.x,bhi.x)*dx[0], @@ -335,11 +335,11 @@ public: void fillFab_Cpu (BaseFab& levelset, const Geometry& geom, Box const& bounding_box) const noexcept { - const auto problo = geom.ProbLoArray(); - const auto dx = geom.CellSizeArray(); + const auto& problo = geom.ProbLoArray(); + const auto& dx = geom.CellSizeArray(); const Box& bx = levelset.box(); - const auto blo = amrex::lbound(bounding_box); - const auto bhi = amrex::ubound(bounding_box); + const auto& blo = amrex::lbound(bounding_box); + const auto& bhi = amrex::ubound(bounding_box); const auto& a = levelset.array(); amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept @@ -359,9 +359,9 @@ public: { auto const& dx = geom.CellSizeArray(); auto const& problo = geom.ProbLoArray(); - const auto blo = amrex::lbound(bounding_box); - const auto bhi = amrex::ubound(bounding_box); - auto f = m_f; + const auto& blo = amrex::lbound(bounding_box); + const auto& bhi = amrex::ubound(bounding_box); + auto const& f = m_f; for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { Array4 const& inter = inter_arr[idim]; Array4 const& type = type_arr[idim]; @@ -430,8 +430,8 @@ public: { auto const& dx = geom.CellSizeArray(); auto const& problo = geom.ProbLoArray(); - const auto blo = amrex::lbound(bounding_box); - const auto bhi = amrex::ubound(bounding_box); + const auto& blo = amrex::lbound(bounding_box); + const auto& bhi = amrex::ubound(bounding_box); const Box bx{inter}; amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept { diff --git a/Src/EB/AMReX_EB2_MultiGFab.cpp b/Src/EB/AMReX_EB2_MultiGFab.cpp index 8d09ca7c069..8461b5618d4 100644 --- a/Src/EB/AMReX_EB2_MultiGFab.cpp +++ b/Src/EB/AMReX_EB2_MultiGFab.cpp @@ -1,7 +1,6 @@ #include #include -#include namespace amrex::EB2 { diff --git a/Src/EB/AMReX_EBDataCollection.cpp b/Src/EB/AMReX_EBDataCollection.cpp index 141f0614629..f9b3d66c79c 100644 --- a/Src/EB/AMReX_EBDataCollection.cpp +++ b/Src/EB/AMReX_EBDataCollection.cpp @@ -208,8 +208,8 @@ void EBDataCollection::extendDataOutsideDomain (IntVect const& level_ng) if (apbx.bigEnd(idim) == nbx.bigEnd(idim)) { apbx.growHi(idim,-1); } - auto lev_apidim_domain = lev_ap_domain[idim]; - Dim3 off = IntVect::TheDimensionVector(idim).dim3(); + auto const& lev_apidim_domain = lev_ap_domain[idim]; + Dim3 const& off = IntVect::TheDimensionVector(idim).dim3(); amrex::ParallelFor(apbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) { diff --git a/Src/EB/AMReX_WriteEBSurface.cpp b/Src/EB/AMReX_WriteEBSurface.cpp index f26420cdcc7..2e94ccb7ffb 100644 --- a/Src/EB/AMReX_WriteEBSurface.cpp +++ b/Src/EB/AMReX_WriteEBSurface.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/Src/EB/AMReX_distFcnElement.H b/Src/EB/AMReX_distFcnElement.H index 1d7c2ef0597..d0c7b867138 100644 --- a/Src/EB/AMReX_distFcnElement.H +++ b/Src/EB/AMReX_distFcnElement.H @@ -18,11 +18,11 @@ class distFcnElement2d { // NOLINT(cppcoreguidelines-special-member-functions) [[nodiscard]] virtual amrex::Real cpdist(amrex::RealVect pt, amrex::RealVect& cp) const = 0; [[nodiscard]] virtual amrex::Real cpside(amrex::RealVect pt, amrex::RealVect& cp) const = 0; - static int solve_thomas(const std::vector &a, - std::vector b, - const std::vector &c, - std::vector d, - std::vector &X); + static int solve_thomas (const std::vector &a, + std::vector &b, + const std::vector &c, + std::vector &d, + std::vector &X); }; diff --git a/Src/EB/AMReX_distFcnElement.cpp b/Src/EB/AMReX_distFcnElement.cpp index b6040d30f53..c72bb6af8f9 100644 --- a/Src/EB/AMReX_distFcnElement.cpp +++ b/Src/EB/AMReX_distFcnElement.cpp @@ -6,11 +6,11 @@ namespace amrex { -int distFcnElement2d::solve_thomas(const std::vector &a, - std::vector b, - const std::vector &c, - std::vector d, - std::vector &x) +int distFcnElement2d::solve_thomas (const std::vector &a, + std::vector &b, + const std::vector &c, + std::vector &d, + std::vector &x) { int n = static_cast(d.size()); x.resize(n); diff --git a/Src/Extern/Conduit/AMReX_Conduit_Blueprint_ParticlesI.H b/Src/Extern/Conduit/AMReX_Conduit_Blueprint_ParticlesI.H index e4186ba247b..67725cba876 100644 --- a/Src/Extern/Conduit/AMReX_Conduit_Blueprint_ParticlesI.H +++ b/Src/Extern/Conduit/AMReX_Conduit_Blueprint_ParticlesI.H @@ -69,14 +69,14 @@ ParticleTileToBlueprint(const ParticleTile(&soa.GetRealData(0)[0]), + n_coords["values/x"].set_external(const_cast(soa.GetRealData(0).data()), num_particles); #if AMREX_SPACEDIM > 1 - n_coords["values/y"].set_external(const_cast(&soa.GetRealData(1)[0]), + n_coords["values/y"].set_external(const_cast(soa.GetRealData(1).data()), num_particles); #endif #if AMREX_SPACEDIM > 2 - n_coords["values/z"].set_external(const_cast(&soa.GetRealData(2)[0]), + n_coords["values/z"].set_external(const_cast(soa.GetRealData(2).data()), num_particles); #endif } else @@ -175,22 +175,12 @@ ParticleTileToBlueprint(const ParticleTile(&soa.GetIntData(0)[0]), - num_particles); - - // cpu is the second int entry - conduit::Node &n_f_cpu = n_fields[topology_name + "_cpu"]; - - n_f_cpu["topology"] = topology_name; - n_f_cpu["association"] = "element"; - n_f_cpu["values"].set_external(const_cast(&soa.GetIntData(0)[0]), - num_particles); + conduit::Node &n_f_idcpu = n_fields[topology_name + "_idcpu"]; + n_f_idcpu["topology"] = topology_name; + n_f_idcpu["association"] = "element"; + n_f_idcpu["values"].set_external(const_cast(soa.GetIdCPUData().data()), + num_particles); } // -------------------------------- @@ -232,7 +222,7 @@ ParticleTileToBlueprint(const ParticleTile(&soa.GetRealData(i)[0]), + n_f["values"].set_external(const_cast(soa.GetRealData(i).data()), num_particles); vname_real_idx++; @@ -244,7 +234,7 @@ ParticleTileToBlueprint(const ParticleTile(&soa.GetIntData(i)[0]), + n_f["values"].set_external(const_cast(soa.GetIntData(i).data()), num_particles); vname_int_idx++; diff --git a/Src/Extern/Conduit/CMakeLists.txt b/Src/Extern/Conduit/CMakeLists.txt index 75e84099f1d..07c4722ac4b 100644 --- a/Src/Extern/Conduit/CMakeLists.txt +++ b/Src/Extern/Conduit/CMakeLists.txt @@ -4,6 +4,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) # TODO: Particles PR merges another file target_sources(amrex_${D}d PRIVATE + AMReX_Conduit_Blueprint_ParticlesI.H AMReX_Conduit_Blueprint.H AMReX_Conduit_Blueprint.cpp ) diff --git a/Src/Extern/Conduit/Make.package b/Src/Extern/Conduit/Make.package index 2fc13b7d606..def60ba1a74 100644 --- a/Src/Extern/Conduit/Make.package +++ b/Src/Extern/Conduit/Make.package @@ -3,7 +3,7 @@ # CEXE_sources += AMReX_Conduit_Blueprint.cpp - +CEXE_headers += AMReX_Conduit_Blueprint_ParticlesI.H CEXE_headers += AMReX_Conduit_Blueprint.H VPATH_LOCATIONS += $(AMREX_HOME)/Src/Extern/Conduit diff --git a/Src/Extern/HDF5/AMReX_ParticleHDF5.H b/Src/Extern/HDF5/AMReX_ParticleHDF5.H index 00fdf0c14be..a1b9d8faeb8 100644 --- a/Src/Extern/HDF5/AMReX_ParticleHDF5.H +++ b/Src/Extern/HDF5/AMReX_ParticleHDF5.H @@ -504,7 +504,7 @@ ParticleContainer_impl -1) { + if (chunk_size > total_int_size) { + H5Pset_chunk(dcpl_int, 1, &total_int_size); + } + } + } + int_dset_space = H5Screate_simple(1, &total_int_size, NULL); #ifdef AMREX_USE_HDF5_ASYNC int_dset_id = H5Dcreate_async(grp, "data:datatype=0", H5T_NATIVE_INT, int_dset_space, H5P_DEFAULT, dcpl_int, H5P_DEFAULT, es_par_g); @@ -808,6 +809,14 @@ ParticleContainer_impl -1) { + if (chunk_size > total_real_size) { + H5Pset_chunk(dcpl_real, 1, &total_real_size); + } + } + } + real_dset_space = H5Screate_simple(1, &total_real_size, NULL); if (sizeof(typename ParticleType::RealType) == 4) { #ifdef AMREX_USE_HDF5_ASYNC @@ -837,7 +846,7 @@ ParticleContainer_impl hostfab; + if (fab.arena()->isManaged() || fab.arena()->isDevice()) { + hostfab = std::make_unique(fab.box(), fab.nComp(), + The_Pinned_Arena()); + Gpu::dtoh_memcpy_async(hostfab->dataPtr(), fab.dataPtr(), + fab.size()*sizeof(Real)); + Gpu::streamSynchronize(); + fabdata = hostfab->dataPtr(); + } +#endif if(doConvert) { RealDescriptor::convertFromNativeFormat(static_cast (a_buffer.dataPtr()+writeDataSize), - writeDataItems, fab.dataPtr(), *whichRD); + writeDataItems, fabdata, *whichRD); } else { // ---- copy from the fab memcpy(static_cast (a_buffer.dataPtr()+writeDataSize), - fab.dataPtr(), writeDataItems * whichRDBytes); + fabdata, writeDataItems * whichRDBytes); } writeDataSize += writeDataItems; } @@ -720,6 +731,16 @@ void WriteMultiLevelPlotfileHDF5SingleDset (const std::string& plotfilename, } #endif + // Force maximum chunk size to be size of write + if (H5Pget_layout(lev_dcpl_id) == H5D_CHUNKED) { + hsize_t chunk_size; + if (H5Pget_chunk(lev_dcpl_id, 1, &chunk_size) > -1) { + if (chunk_size > hs_allprocsize[0]) { + H5Pset_chunk(lev_dcpl_id, 1, hs_allprocsize); + } + } + } + #ifdef AMREX_USE_HDF5_ASYNC hid_t dataset = H5Dcreate_async(grp, dataname.c_str(), data_type, dataspace, H5P_DEFAULT, lev_dcpl_id, H5P_DEFAULT, es_id_g); #else @@ -897,18 +918,8 @@ void WriteMultiLevelPlotfileHDF5MultiDset (const std::string& plotfilename, H5Pset_fill_time(dcpl_id, H5D_FILL_TIME_NEVER); H5Pset_alloc_time(dcpl_id, H5D_ALLOC_TIME_INCR); -#if (defined AMREX_USE_HDF5_ZFP) || (defined AMREX_USE_HDF5_SZ) - const char *chunk_env = NULL; std::string mode_env, value_env; double comp_value = -1.0; - hsize_t chunk_dim = 1024; - - chunk_env = getenv("HDF5_CHUNK_SIZE"); - if (chunk_env != NULL) - chunk_dim = atoi(chunk_env); - - H5Pset_chunk(dcpl_id, 1, &chunk_dim); - std::string::size_type pos = compression.find('@'); if (pos != std::string::npos) { mode_env = compression.substr(0, pos); @@ -918,42 +929,56 @@ void WriteMultiLevelPlotfileHDF5MultiDset (const std::string& plotfilename, } } + if (!mode_env.empty() && mode_env != "None") { + const char *chunk_env = NULL; + hsize_t chunk_dim = 1024; + chunk_env = getenv("HDF5_CHUNK_SIZE"); + if (chunk_env != NULL) { + chunk_dim = atoi(chunk_env); + } + + H5Pset_chunk(dcpl_id, 1, &chunk_dim); + #ifdef AMREX_USE_HDF5_ZFP - pos = compression.find("ZFP"); - if (pos != std::string::npos) { - ret = H5Z_zfp_initialize(); - if (ret < 0) { amrex::Abort("ZFP initialize failed!"); } - } + pos = mode_env.find("ZFP"); + if (pos != std::string::npos) { + ret = H5Z_zfp_initialize(); + if (ret < 0) { amrex::Abort("ZFP initialize failed!"); } + } #endif + if (mode_env == "ZLIB") { + H5Pset_shuffle(dcpl_id); + H5Pset_deflate(dcpl_id, (int)comp_value); + } #ifdef AMREX_USE_HDF5_SZ - pos = compression.find("SZ"); - if (pos != std::string::npos) { - ret = H5Z_SZ_Init((char*)value_env.c_str()); - if (ret < 0) { amrex::Abort("ZFP initialize failed, check SZ config file!"); } - } + else if (mode_env == "SZ") { + ret = H5Z_SZ_Init((char*)value_env.c_str()); + if (ret < 0) { + std::cout << "SZ config file:" << value_env.c_str() << std::endl; + amrex::Abort("SZ initialize failed, check SZ config file!"); + } + } #endif - - if (!mode_env.empty() && mode_env != "None") { - if (mode_env == "ZLIB") - H5Pset_deflate(dcpl_id, (int)comp_value); #ifdef AMREX_USE_HDF5_ZFP - else if (mode_env == "ZFP_RATE") + else if (mode_env == "ZFP_RATE") { H5Pset_zfp_rate(dcpl_id, comp_value); - else if (mode_env == "ZFP_PRECISION") + } + else if (mode_env == "ZFP_PRECISION") { H5Pset_zfp_precision(dcpl_id, (unsigned int)comp_value); - else if (mode_env == "ZFP_ACCURACY") + } + else if (mode_env == "ZFP_ACCURACY") { H5Pset_zfp_accuracy(dcpl_id, comp_value); - else if (mode_env == "ZFP_REVERSIBLE") + } + else if (mode_env == "ZFP_REVERSIBLE") { H5Pset_zfp_reversible(dcpl_id); + } #endif - if (ParallelDescriptor::MyProc() == 0) { std::cout << "\nHDF5 checkpoint using " << mode_env << ", " << value_env << ", " << chunk_dim << std::endl; } } -#endif BL_PROFILE_VAR("H5writeAllLevel", h5dwd); @@ -1178,6 +1203,15 @@ void WriteMultiLevelPlotfileHDF5MultiDset (const std::string& plotfilename, hid_t dataspace = H5Screate_simple(1, hs_allprocsize, NULL); snprintf(dataname, sizeof dataname, "data:datatype=%d", jj); + // Force maximum chunk size to be size of write + if (H5Pget_layout(lev_dcpl_id) == H5D_CHUNKED) { + hsize_t chunk_size; + if (H5Pget_chunk(lev_dcpl_id, 1, &chunk_size) > -1) { + if (chunk_size > hs_allprocsize[0]) { + H5Pset_chunk(lev_dcpl_id, 1, hs_allprocsize); + } + } + } #ifdef AMREX_USE_HDF5_ASYNC dataset = H5Dcreate_async(grp, dataname, data_type, dataspace, H5P_DEFAULT, lev_dcpl_id, H5P_DEFAULT, es_id_g); if(dataset < 0) { std::cout << ParallelDescriptor::MyProc() << "create data failed! ret = " << dataset << std::endl; } diff --git a/Src/Extern/HDF5/AMReX_WriteBinaryParticleDataHDF5.H b/Src/Extern/HDF5/AMReX_WriteBinaryParticleDataHDF5.H index 271f2e0e4cd..462716ae2a3 100644 --- a/Src/Extern/HDF5/AMReX_WriteBinaryParticleDataHDF5.H +++ b/Src/Extern/HDF5/AMReX_WriteBinaryParticleDataHDF5.H @@ -385,7 +385,7 @@ void WriteHDF5ParticleDataSync (PC const& pc, int nOutFiles(-1); ParmParse pp("particles"); - pp.queryAdd("particles_nfiles",nOutFiles); + pp.query("particles_nfiles",nOutFiles); if(nOutFiles == -1) nOutFiles = NProcs; /* nOutFiles = std::max(1, std::min(nOutFiles,NProcs)); */ pc.nOutFilesPrePost = nOutFiles; diff --git a/Src/Extern/HYPRE/AMReX_HypreABecLap.H b/Src/Extern/HYPRE/AMReX_HypreABecLap.H index 1d57d942a14..9b0685de434 100644 --- a/Src/Extern/HYPRE/AMReX_HypreABecLap.H +++ b/Src/Extern/HYPRE/AMReX_HypreABecLap.H @@ -7,7 +7,6 @@ #include #include #include -#include #include #include diff --git a/Src/Extern/HYPRE/AMReX_HypreABecLap.cpp b/Src/Extern/HYPRE/AMReX_HypreABecLap.cpp index 4607fcc1fca..2d4c313c086 100644 --- a/Src/Extern/HYPRE/AMReX_HypreABecLap.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreABecLap.cpp @@ -115,7 +115,7 @@ HypreABecLap::getSolution (MultiFab& a_soln) auto reghi = Hypre::hiV(reg); HYPRE_StructVectorGetBoxValues(x, reglo.data(), reghi.data(), (*soln)[mfi].dataPtr()); } - Gpu::synchronize(); + Gpu::hypreSynchronize(); if (a_soln.nGrowVect() != 0) { MultiFab::Copy(a_soln, tmp, 0, 0, 1, 0); @@ -235,7 +235,7 @@ HypreABecLap::prepareSolver () HYPRE_StructMatrixSetBoxValues(A, reglo.data(), reghi.data(), regular_stencil_size, stencil_indices.data(), mat); - Gpu::synchronize(); + Gpu::hypreSynchronize(); } HYPRE_StructMatrixAssemble(A); @@ -299,7 +299,7 @@ HypreABecLap::loadVectors (MultiFab& soln, const MultiFab& rhs) HYPRE_StructVectorSetBoxValues(x, reglo.data(), reghi.data(), soln[mfi].dataPtr()); HYPRE_StructVectorSetBoxValues(b, reglo.data(), reghi.data(), rhs_diag[mfi].dataPtr()); } - Gpu::synchronize(); + Gpu::hypreSynchronize(); } } diff --git a/Src/Extern/HYPRE/AMReX_HypreABecLap2.cpp b/Src/Extern/HYPRE/AMReX_HypreABecLap2.cpp index 1b7ba8e895b..a44d2d517e3 100644 --- a/Src/Extern/HYPRE/AMReX_HypreABecLap2.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreABecLap2.cpp @@ -137,7 +137,7 @@ HypreABecLap2::getSolution (MultiFab& a_soln) HYPRE_SStructVectorGetBoxValues(x, part, reglo.data(), reghi.data(), 0, (*soln)[mfi].dataPtr()); } - Gpu::synchronize(); + Gpu::hypreSynchronize(); if (a_soln.nGrowVect() != 0) { MultiFab::Copy(a_soln, tmp, 0, 0, 1, 0); @@ -262,7 +262,7 @@ HypreABecLap2::prepareSolver () HYPRE_SStructMatrixSetBoxValues(A, part, reglo.data(), reghi.data(), 0, regular_stencil_size, stencil_indices.data(), mat); - Gpu::synchronize(); + Gpu::hypreSynchronize(); } HYPRE_SStructMatrixAssemble(A); @@ -335,7 +335,7 @@ HypreABecLap2::loadVectors (MultiFab& soln, const MultiFab& rhs) HYPRE_SStructVectorSetBoxValues(b, part, reglo.data(), reghi.data(), 0, rhs_diag[mfi].dataPtr()); } - Gpu::synchronize(); + Gpu::hypreSynchronize(); } } diff --git a/Src/Extern/HYPRE/AMReX_HypreABecLap3.cpp b/Src/Extern/HYPRE/AMReX_HypreABecLap3.cpp index d04b85c314b..216cf368f5a 100644 --- a/Src/Extern/HYPRE/AMReX_HypreABecLap3.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreABecLap3.cpp @@ -77,7 +77,7 @@ HypreABecLap3::getSolution (MultiFab& a_soln) (*l_soln)[mfi].setVal(0.0); } } - Gpu::synchronize(); + Gpu::hypreSynchronize(); if (use_tmp_mf) { MultiFab::Copy(a_soln, tmp, 0, 0, 1, 0); @@ -499,9 +499,9 @@ HypreABecLap3::prepareSolver () }); } - Gpu::synchronize(); + Gpu::streamSynchronize(); HYPRE_IJMatrixSetValues(A,nrows,ncols,rows,cols,mat); - Gpu::synchronize(); + Gpu::hypreSynchronize(); } } HYPRE_IJMatrixAssemble(A); @@ -681,7 +681,7 @@ HypreABecLap3::loadVectors (MultiFab& soln, const MultiFab& rhs) HYPRE_IJVectorSetValues(b, nrows, cell_id_vec[mfi].dataPtr(), rhs_diag[mfi].dataPtr()); } } - Gpu::synchronize(); + Gpu::hypreSynchronize(); } } // namespace amrex diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H index 04147207d36..6f687766b86 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.H @@ -39,6 +39,7 @@ public: void setVerbose (int v) { m_verbose = v; } void setMaxIter (int v) { m_maxiter = v; } + void setIsSingular (bool v) { m_is_singular = v; } void setup (Real a_ascalar, Real a_bscalar, Vector const& a_acoefs, @@ -65,6 +66,7 @@ private: int m_verbose = 0; int m_maxiter = 200; + bool m_is_singular = false; Vector m_geom; Vector m_grids; @@ -87,6 +89,7 @@ private: Vector> m_bndry; Vector> m_bndry_rhs; Vector m_fine_masks; + Vector m_crse_masks; // For coarse cells at coarse/fine interface. The vector is for AMR // levels. diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp index d7621c6bd0e..6f5718f826b 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap.cpp @@ -57,11 +57,16 @@ HypreMLABecLap::HypreMLABecLap (Vector a_geom, } m_fine_masks.resize(m_nlevels-1); + m_crse_masks.resize(m_nlevels-1); for (int ilev = 0; ilev < m_nlevels-1; ++ilev) { m_fine_masks[ilev] = amrex::makeFineMask(m_grids[ilev], m_dmap[ilev], IntVect(1), m_grids[ilev+1], m_ref_ratio[ilev], m_geom[ilev].periodicity(), 0, 1); + m_crse_masks[ilev].define(m_grids[ilev], m_dmap[ilev], 1, 1); + m_crse_masks[ilev].BuildMask(m_geom[ilev].Domain(), + m_geom[ilev].periodicity(), + 1, 0, 0, 1); } m_c2f_offset_from.resize(m_nlevels-1); @@ -406,6 +411,15 @@ void HypreMLABecLap::addNonStencilEntriesToGraph () m_f2c_offset.resize(m_nlevels-1); m_f2c_values.resize(m_nlevels-1); + Vector period(m_nlevels); + Vector smallend(m_nlevels); + Vector bigend(m_nlevels); + for (int ilev = 0; ilev (entry); auto const to_level = std::get<3>(entry); @@ -419,6 +433,15 @@ void HypreMLABecLap::addNonStencilEntriesToGraph () GpuArray to_index{AMREX_D_DECL(to_iv[0], to_iv[1], to_iv[2])}; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (m_geom[0].isPeriodic(idim)) { + if (to_index[idim] < smallend[to_level][idim]) { + to_index[idim] += period[to_level][idim]; + } else if (to_index[idim] > bigend[to_level][idim]) { + to_index[idim] -= period[to_level][idim]; + } + } + } constexpr int ivar = 0; HYPRE_SStructGraphAddEntries(m_ss_graph, from_level, from_index.data(), ivar, @@ -588,12 +611,19 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, const auto boxlo = amrex::lbound(vbx); const auto boxhi = amrex::ubound(vbx); // Set up stencil part of the matrix + auto fixed_pt = IntVect::TheMaxVector(); + if (m_is_singular && m_nlevels-1 == ilev) { + auto const& box0 = m_grids.back()[0]; + fixed_pt = box0.smallEnd() + 1; + // This cell does not have any non-stencil entries. So it's + // a good point for fixing singularity. + } amrex::fill(matfab, [=] AMREX_GPU_HOST_DEVICE (GpuArray& sten, int i, int j, int k) { hypmlabeclap_mat(sten, i, j, k, boxlo, boxhi, sa, afab, sb, dx, bfabs, - bctype, bcl, bcmsk, bcval, bcrhs, ilev); + bctype, bcl, bcmsk, bcval, bcrhs, ilev, fixed_pt); }); bool need_sync = true; @@ -636,6 +666,7 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, auto const& c2f_offset_to_a = m_c2f_offset_to[ilev].const_array(mfi); auto const& mat_a = matfab.array(); auto const& fine_mask = m_fine_masks[ilev].const_array(mfi); + auto const& crse_mask = m_crse_masks[ilev].const_array(mfi); AMREX_D_TERM(auto offset_bx_a = m_offset_cf_bcoefs[ilev][0].isDefined() ? m_offset_cf_bcoefs[ilev][0].const_array(mfi) : Array4{};, @@ -664,7 +695,7 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, c2f_offset_to_a, dx, sb, AMREX_D_DECL(offset_bx_a,offset_by_a,offset_bz_a), AMREX_D_DECL(p_bx, p_by, p_bz), - fine_mask,rr); + fine_mask,rr, crse_mask); }); if (c2f_total_from > 0) { #ifdef AMREX_USE_GPU @@ -838,8 +869,8 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, HYPRE_SStructSSAMGSetNumPostRelax(m_ss_solver, 4); HYPRE_SStructSSAMGSetNumCoarseRelax(m_ss_solver, 4); - HYPRE_SStructSSAMGSetLogging(m_ss_solver, m_verbose); - HYPRE_SStructSSAMGSetPrintLevel(m_ss_solver, m_verbose); + HYPRE_SStructSSAMGSetLogging(m_ss_solver, 1); + // HYPRE_SStructSSAMGSetPrintLevel(m_ss_solver, 1); /* 0: no, 1: setup, 2: solve, 3:both // HYPRE_SStructSSAMGSetup(m_ss_solver, A, b, x); @@ -854,15 +885,15 @@ void HypreMLABecLap::setup (Real a_ascalar, Real a_bscalar, HYPRE_BoomerAMGCreate(&m_solver); HYPRE_BoomerAMGSetOldDefault(m_solver); // Falgout coarsening with modified classical interpolation - HYPRE_BoomerAMGSetStrongThreshold(m_solver, (AMREX_SPACEDIM == 3) ? 0.6 : 0.25); // default is 0.25 + HYPRE_BoomerAMGSetStrongThreshold(m_solver, (AMREX_SPACEDIM == 3) ? 0.4 : 0.25); // default is 0.25 HYPRE_BoomerAMGSetRelaxOrder(m_solver, 1); /* 0: default, natural order, 1: C/F relaxation order */ HYPRE_BoomerAMGSetNumSweeps(m_solver, 2); /* Sweeps on fine levels */ // HYPRE_BoomerAMGSetFCycle(m_solver, 1); // default is 0 // HYPRE_BoomerAMGSetCoarsenType(m_solver, 6); // HYPRE_BoomerAMGSetRelaxType(m_solver, 6); /* G-S/Jacobi hybrid relaxation */ - HYPRE_BoomerAMGSetLogging(m_solver, m_verbose); - HYPRE_BoomerAMGSetPrintLevel(m_solver, m_verbose); + HYPRE_BoomerAMGSetLogging(m_solver, 1); + // HYPRE_BoomerAMGSetPrintLevel(m_solver, 1); /* 0: no, 1: setup, 2: solve, 3:both HYPRE_ParCSRMatrix par_A; HYPRE_SStructMatrixGetObject(m_ss_A, (void**) &par_A); @@ -956,6 +987,9 @@ void HypreMLABecLap::solve (Vector const& a_sol, Vector 0"); } + HYPRE_Int num_iterations; + Real final_res; + #ifdef AMREX_FEATURE_HYPRE_SSAMG if (m_hypre_solver_id == HypreSolverID::SSAMG) { @@ -965,15 +999,13 @@ void HypreMLABecLap::solve (Vector const& a_sol, Vector const& a_sol, Vector reltol) { + amrex::Abort("Hypre failed to converge after "+std::to_string(num_iterations)+ + " iterations. Final relative residual is "+std::to_string(final_res)); + } } // Get solution @@ -1044,8 +1079,6 @@ void HypreMLABecLap::solve (Vector const& a_sol, Vector= 0; --ilev) { amrex::average_down(*a_sol[ilev+1], *a_sol[ilev], 0, ncomp, m_ref_ratio[ilev]); } - - // xxxxx abort if convergence is not reached. } #ifdef AMREX_USE_GPU diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H index 57a37f19bf3..7d083e7d98c 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_2D_K.H @@ -109,7 +109,7 @@ void hypmlabeclap_c2f (int i, int j, int k, Array4 const& offset_by, Real const* bx, Real const* by, Array4 const& fine_mask, - IntVect const& rr) + IntVect const& rr, Array4 const& crse_mask) { if (fine_mask(i,j,k)) { // Let's set off-diagonal elements to zero @@ -159,9 +159,13 @@ void hypmlabeclap_c2f (int i, int j, int k, Real xInt = Real(-0.5) + (irx+Real(0.5))/Real(rr[0]); Real xc[3] = {Real(-1.0), Real(0.0), Real(1.0)}; Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)}; - if (fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { poly_interp_coeff<2>(xInt, &(xc[1]), &(ct[1])); - } else if (fine_mask(i+1,j,k)) { + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) + { poly_interp_coeff<2>(xInt, xc, ct); } else { poly_interp_coeff<3>(xInt, xc, ct); @@ -202,9 +206,13 @@ void hypmlabeclap_c2f (int i, int j, int k, Real yInt = Real(-0.5) + (iry+Real(0.5))/Real(rr[1]); Real yc[3] = {Real(-1.0), Real(0.0), Real(1.0)}; Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)}; - if (fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { poly_interp_coeff<2>(yInt, &(yc[1]), &(ct[1])); - } else if (fine_mask(i,j+1,k)) { + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) + { poly_interp_coeff<2>(yInt, yc, ct); } else { poly_interp_coeff<3>(yInt, yc, ct); @@ -244,9 +252,13 @@ void hypmlabeclap_c2f (int i, int j, int k, Real yInt = Real(-0.5) + (iry+Real(0.5))/Real(rr[1]); Real yc[3] = {Real(-1.0), Real(0.0), Real(1.0)}; Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)}; - if (fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { poly_interp_coeff<2>(yInt, &(yc[1]), &(ct[1])); - } else if (fine_mask(i,j+1,k)) { + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) + { poly_interp_coeff<2>(yInt, yc, ct); } else { poly_interp_coeff<3>(yInt, yc, ct); @@ -286,9 +298,13 @@ void hypmlabeclap_c2f (int i, int j, int k, Real xInt = Real(-0.5) + (irx+Real(0.5))/Real(rr[0]); Real xc[3] = {Real(-1.0), Real(0.0), Real(1.0)}; Real ct[3] = {Real(0.0), Real(0.0), Real(0.0)}; - if (fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { poly_interp_coeff<2>(xInt, &(xc[1]), &(ct[1])); - } else if (fine_mask(i+1,j,k)) { + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) + { poly_interp_coeff<2>(xInt, xc, ct); } else { poly_interp_coeff<3>(xInt, xc, ct); diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H index 8e6e1a39b14..431650236f7 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_3D_K.H @@ -166,7 +166,7 @@ void hypmlabeclap_c2f (int i, int j, int k, Array4 const& offset_bz, Real const* bx, Real const* by, Real const* bz, Array4 const& fine_mask, - IntVect const& rr) + IntVect const& rr, Array4 const& crse_mask) { if (fine_mask(i,j,k)) { // Let's set off-diagonal elements to zero @@ -191,7 +191,11 @@ void hypmlabeclap_c2f (int i, int j, int k, (! fine_mask(i,j-1,k-1)) && (! fine_mask(i,j+1,k-1)) && (! fine_mask(i,j-1,k+1)) && - (! fine_mask(i,j+1,k+1))) + (! fine_mask(i,j+1,k+1)) && + ( crse_mask(i,j-1,k-1)) && + ( crse_mask(i,j+1,k-1)) && + ( crse_mask(i,j-1,k+1)) && + ( crse_mask(i,j+1,k+1))) { corner[0] = true; } @@ -199,7 +203,11 @@ void hypmlabeclap_c2f (int i, int j, int k, (! fine_mask(i-1,j,k-1)) && (! fine_mask(i+1,j,k-1)) && (! fine_mask(i-1,j,k+1)) && - (! fine_mask(i+1,j,k+1))) + (! fine_mask(i+1,j,k+1)) && + ( crse_mask(i-1,j,k-1)) && + ( crse_mask(i+1,j,k-1)) && + ( crse_mask(i-1,j,k+1)) && + ( crse_mask(i+1,j,k+1))) { corner[1] = true; } @@ -207,7 +215,11 @@ void hypmlabeclap_c2f (int i, int j, int k, (! fine_mask(i-1,j-1,k)) && (! fine_mask(i+1,j-1,k)) && (! fine_mask(i-1,j+1,k)) && - (! fine_mask(i+1,j+1,k))) + (! fine_mask(i+1,j+1,k)) && + ( crse_mask(i-1,j-1,k)) && + ( crse_mask(i+1,j-1,k)) && + ( crse_mask(i-1,j+1,k)) && + ( crse_mask(i+1,j+1,k))) { corner[2] = true; } @@ -253,28 +265,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) { - s0 -= x*x; - stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); - stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); - } else if (!fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { + s0 += Real(-0.5)*x; + stencil(i,j,k)[2] += fac0*Real(0.5)*x; + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) { s0 += Real(0.5)*x; stencil(i,j,k)[1] += fac0*Real(-0.5)*x; } else { - s0 += Real(-0.5)*x; - stencil(i,j,k)[2] += fac0*Real(0.5)*x; + s0 -= x*x; + stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); + stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); } - if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) { - s0 -= y*y; - stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); - stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); - } else if (!fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { + s0 += Real(-0.5)*y; + stencil(i,j,k)[4] += fac0*Real(0.5)*y; + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) { s0 += Real(0.5)*y; stencil(i,j,k)[3] += fac0*Real(-0.5)*y; } else { - s0 += Real(-0.5)*y; - stencil(i,j,k)[4] += fac0*Real(0.5)*y; + s0 -= y*y; + stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); + stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -322,28 +340,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) { - s0 -= x*x; - stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); - stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); - } else if (!fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { + s0 += Real(-0.5)*x; + stencil(i,j,k)[2] += fac0*Real(0.5)*x; + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) { s0 += Real(0.5)*x; stencil(i,j,k)[1] += fac0*Real(-0.5)*x; } else { - s0 += Real(-0.5)*x; - stencil(i,j,k)[2] += fac0*Real(0.5)*x; + s0 -= x*x; + stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); + stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); } - if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) { - s0 -= z*z; - stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); - stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); - } else if (!fine_mask(i,j,k-1)) { + if ( fine_mask(i,j,k-1) || + !crse_mask(i,j,k-1)) + { + s0 += Real(-0.5)*z; + stencil(i,j,k)[6] += fac0*Real(0.5)*z; + } else if ( fine_mask(i,j,k+1) || + !crse_mask(i,j,k+1)) { s0 += Real(0.5)*z; stencil(i,j,k)[5] += fac0*Real(-0.5)*z; } else { - s0 += Real(-0.5)*z; - stencil(i,j,k)[6] += fac0*Real(0.5)*z; + s0 -= z*z; + stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); + stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -393,28 +417,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) { - s0 -= y*y; - stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); - stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); - } else if (!fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { + s0 += Real(-0.5)*y; + stencil(i,j,k)[4] += fac0*Real(0.5)*y; + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) { s0 += Real(0.5)*y; stencil(i,j,k)[3] += fac0*Real(-0.5)*y; } else { - s0 += Real(-0.5)*y; - stencil(i,j,k)[4] += fac0*Real(0.5)*y; + s0 -= y*y; + stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); + stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); } - if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) { - s0 -= z*z; - stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); - stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); - } else if (!fine_mask(i,j,k-1)) { + if ( fine_mask(i,j,k-1) || + !crse_mask(i,j,k-1)) + { + s0 += Real(-0.5)*z; + stencil(i,j,k)[6] += fac0*Real(0.5)*z; + } else if ( fine_mask(i,j,k+1) || + !crse_mask(i,j,k+1)) { s0 += Real(0.5)*z; stencil(i,j,k)[5] += fac0*Real(-0.5)*z; } else { - s0 += Real(-0.5)*z; - stencil(i,j,k)[6] += fac0*Real(0.5)*z; + s0 -= z*z; + stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); + stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -463,28 +493,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) { - s0 -= y*y; - stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); - stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); - } else if (!fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { + s0 += Real(-0.5)*y; + stencil(i,j,k)[4] += fac0*Real(0.5)*y; + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) { s0 += Real(0.5)*y; stencil(i,j,k)[3] += fac0*Real(-0.5)*y; } else { - s0 += Real(-0.5)*y; - stencil(i,j,k)[4] += fac0*Real(0.5)*y; + s0 -= y*y; + stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); + stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); } - if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) { - s0 -= z*z; - stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); - stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); - } else if (!fine_mask(i,j,k-1)) { + if ( fine_mask(i,j,k-1) || + !crse_mask(i,j,k-1)) + { + s0 += Real(-0.5)*z; + stencil(i,j,k)[6] += fac0*Real(0.5)*z; + } else if ( fine_mask(i,j,k+1) || + !crse_mask(i,j,k+1)) { s0 += Real(0.5)*z; stencil(i,j,k)[5] += fac0*Real(-0.5)*z; } else { - s0 += Real(-0.5)*z; - stencil(i,j,k)[6] += fac0*Real(0.5)*z; + s0 -= z*z; + stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); + stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -534,28 +570,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) { - s0 -= x*x; - stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); - stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); - } else if (!fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { + s0 += Real(-0.5)*x; + stencil(i,j,k)[2] += fac0*Real(0.5)*x; + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) { s0 += Real(0.5)*x; stencil(i,j,k)[1] += fac0*Real(-0.5)*x; } else { - s0 += Real(-0.5)*x; - stencil(i,j,k)[2] += fac0*Real(0.5)*x; + s0 -= x*x; + stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); + stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); } - if (!fine_mask(i,j,k-1) && !fine_mask(i,j,k+1)) { - s0 -= z*z; - stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); - stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); - } else if (!fine_mask(i,j,k-1)) { + if ( fine_mask(i,j,k-1) || + !crse_mask(i,j,k-1)) + { + s0 += Real(-0.5)*z; + stencil(i,j,k)[6] += fac0*Real(0.5)*z; + } else if ( fine_mask(i,j,k+1) || + !crse_mask(i,j,k+1)) { s0 += Real(0.5)*z; stencil(i,j,k)[5] += fac0*Real(-0.5)*z; } else { - s0 += Real(-0.5)*z; - stencil(i,j,k)[6] += fac0*Real(0.5)*z; + s0 -= z*z; + stencil(i,j,k)[5] += fac0*Real(0.5)*z*(z-Real(1.0)); + stencil(i,j,k)[6] += fac0*Real(0.5)*z*(z+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; @@ -605,28 +647,34 @@ void hypmlabeclap_c2f (int i, int j, int k, Real fac0 = fac*cc[0]; Real s0 = Real(1.0); - if (!fine_mask(i-1,j,k) && !fine_mask(i+1,j,k)) { - s0 -= x*x; - stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); - stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); - } else if (!fine_mask(i-1,j,k)) { + if ( fine_mask(i-1,j,k) || + !crse_mask(i-1,j,k)) + { + s0 += Real(-0.5)*x; + stencil(i,j,k)[2] += fac0*Real(0.5)*x; + } else if ( fine_mask(i+1,j,k) || + !crse_mask(i+1,j,k)) { s0 += Real(0.5)*x; stencil(i,j,k)[1] += fac0*Real(-0.5)*x; } else { - s0 += Real(-0.5)*x; - stencil(i,j,k)[2] += fac0*Real(0.5)*x; + s0 -= x*x; + stencil(i,j,k)[1] += fac0*Real(0.5)*x*(x-Real(1.0)); + stencil(i,j,k)[2] += fac0*Real(0.5)*x*(x+Real(1.0)); } - if (!fine_mask(i,j-1,k) && !fine_mask(i,j+1,k)) { - s0 -= y*y; - stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); - stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); - } else if (!fine_mask(i,j-1,k)) { + if ( fine_mask(i,j-1,k) || + !crse_mask(i,j-1,k)) + { + s0 += Real(-0.5)*y; + stencil(i,j,k)[4] += fac0*Real(0.5)*y; + } else if ( fine_mask(i,j+1,k) || + !crse_mask(i,j+1,k)) { s0 += Real(0.5)*y; stencil(i,j,k)[3] += fac0*Real(-0.5)*y; } else { - s0 += Real(-0.5)*y; - stencil(i,j,k)[4] += fac0*Real(0.5)*y; + s0 -= y*y; + stencil(i,j,k)[3] += fac0*Real(0.5)*y*(y-Real(1.0)); + stencil(i,j,k)[4] += fac0*Real(0.5)*y*(y+Real(1.0)); } stencil(i,j,k)[0] += fac0*s0; diff --git a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H index ea38bf5037f..129a6a989a8 100644 --- a/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H +++ b/Src/Extern/HYPRE/AMReX_HypreMLABecLap_K.H @@ -22,7 +22,7 @@ void hypmlabeclap_mat (GpuArray& sten, int i, int j, in GpuArray, AMREX_SPACEDIM*2> const& bcmsk, GpuArray, AMREX_SPACEDIM*2> const& bcval, GpuArray, AMREX_SPACEDIM*2> const& bcrhs, - int level) + int level, IntVect const& fixed_pt) { Real bxm = b[0] ? b[0](i ,j ,k ) : Real(1.0); Real bxp = b[0] ? b[0](i+1,j ,k ) : Real(1.0); @@ -223,6 +223,12 @@ void hypmlabeclap_mat (GpuArray& sten, int i, int j, in } #endif + + if (fixed_pt == IntVect(AMREX_D_DECL(i,j,k))) { + for (int n = 1; n < 2*AMREX_SPACEDIM+1; ++n) { + sten[n] = Real(0.0); + } + } } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE diff --git a/Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp b/Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp index bb4bc557ca7..33e45ca0667 100644 --- a/Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreNodeLap.cpp @@ -123,9 +123,9 @@ HypreNodeLap::HypreNodeLap (const BoxArray& grids_, const DistributionMapping& d adjust_singular_matrix(ncols, cols, rows, mat); } - Gpu::synchronize(); + Gpu::streamSynchronize(); HYPRE_IJMatrixSetValues(A, nrows, ncols, rows, cols, mat); - Gpu::synchronize(); + Gpu::hypreSynchronize(); } } HYPRE_IJMatrixAssemble(A); @@ -324,9 +324,9 @@ HypreNodeLap::loadVectors (MultiFab& soln, const MultiFab& rhs) }); } - Gpu::synchronize(); + Gpu::streamSynchronize(); HYPRE_IJVectorSetValues(b, nrows, rows_vec.data(), bvec.data()); - Gpu::synchronize(); + Gpu::hypreSynchronize(); } } } @@ -347,7 +347,7 @@ HypreNodeLap::getSolution (MultiFab& soln) xvec.resize(nrows); Real* xp = xvec.data(); HYPRE_IJVectorGetValues(x, nrows, rows_vec.data(), xp); - Gpu::synchronize(); + Gpu::hypreSynchronize(); const Box& bx = mfi.validbox(); const auto& xfab = tmpsoln.array(mfi); @@ -359,7 +359,7 @@ HypreNodeLap::getSolution (MultiFab& soln) } }); - Gpu::synchronize(); + Gpu::streamSynchronize(); } } diff --git a/Src/Extern/HYPRE/AMReX_HypreSolver.H b/Src/Extern/HYPRE/AMReX_HypreSolver.H index 8f34b72f2df..fb70c90bbbe 100644 --- a/Src/Extern/HYPRE/AMReX_HypreSolver.H +++ b/Src/Extern/HYPRE/AMReX_HypreSolver.H @@ -569,7 +569,7 @@ HypreSolver::fill_matrix (Filler const& filler) Gpu::streamSynchronize(); HYPRE_IJMatrixSetValues(m_A, nrows, ncols_vec.data(), rows, cols_vec.data(), mat_vec.data()); - Gpu::synchronize(); + Gpu::hypreSynchronize(); } } HYPRE_IJMatrixAssemble(m_A); @@ -653,7 +653,7 @@ HypreSolver::load_vectors (Vector const& a_soln, Gpu::streamSynchronize(); HYPRE_IJVectorSetValues(m_x, nrows, rows, xp); HYPRE_IJVectorSetValues(m_b, nrows, rows, bp); - Gpu::synchronize(); + Gpu::hypreSynchronize(); } } } @@ -683,7 +683,7 @@ HypreSolver::get_solution (Vector const& a_soln) HYPRE_Int const* rows = m_global_id_vec[mfi].data(); HYPRE_IJVectorGetValues(m_x, nrows, rows, xp); - Gpu::synchronize(); + Gpu::hypreSynchronize(); HYPRE_Int offset = 0; for (int ivar = 0; ivar < m_nvars; ++ivar) { diff --git a/Src/Extern/SUNDIALS/AMReX_SundialsIntegrator.H b/Src/Extern/SUNDIALS/AMReX_SundialsIntegrator.H index 30ff30a499b..47a028f852d 100644 --- a/Src/Extern/SUNDIALS/AMReX_SundialsIntegrator.H +++ b/Src/Extern/SUNDIALS/AMReX_SundialsIntegrator.H @@ -428,7 +428,6 @@ public: T S_rhs; unpack_vector(y_rhs, S_rhs); - BaseT::pre_rhs_action(S_data, rhs_time); BaseT::Rhs(S_rhs, S_data, rhs_time); return 0; @@ -443,7 +442,6 @@ public: T S_rhs; unpack_vector(y_rhs, S_rhs); - BaseT::pre_rhs_action(S_data, rhs_time); BaseT::RhsIm(S_rhs, S_data, rhs_time); return 0; @@ -458,7 +456,6 @@ public: T S_rhs; unpack_vector(y_rhs, S_rhs); - BaseT::pre_rhs_action(S_data, rhs_time); BaseT::RhsEx(S_rhs, S_data, rhs_time); return 0; @@ -473,7 +470,6 @@ public: T S_rhs; unpack_vector(y_rhs, S_rhs); - BaseT::pre_rhs_action(S_data, rhs_time); BaseT::RhsFast(S_rhs, S_data, rhs_time); return 0; diff --git a/Src/Extern/amrdata/AMReX_AmrData.H b/Src/Extern/amrdata/AMReX_AmrData.H index e62ce6b1549..dcb9253fdc5 100644 --- a/Src/Extern/amrdata/AMReX_AmrData.H +++ b/Src/Extern/amrdata/AMReX_AmrData.H @@ -16,8 +16,6 @@ namespace amrex { -class Interpolater; - class AmrData { protected: @@ -182,8 +180,6 @@ class AmrData { // fill on interior by piecewise constant interpolation void FillInterior(FArrayBox &dest, int level, const Box &subbox); - void Interp(FArrayBox &fine, FArrayBox &crse, - const Box &fine_box, int lrat); void PcInterp(FArrayBox &fine, const FArrayBox &crse, const Box &subbox, int lrat); FArrayBox *ReadGrid(std::istream &is, int numVar); diff --git a/Src/Extern/amrdata/AMReX_AmrData.cpp b/Src/Extern/amrdata/AMReX_AmrData.cpp index 9717f36a90c..a71d1a6b827 100644 --- a/Src/Extern/amrdata/AMReX_AmrData.cpp +++ b/Src/Extern/amrdata/AMReX_AmrData.cpp @@ -34,73 +34,6 @@ using std::ifstream; #define VSHOWVAL(verbose, val) { if(verbose) { \ cout << #val << " = " << val << endl; } } - -#if defined( BL_FORT_USE_UPPERCASE ) -# if (BL_SPACEDIM == 1) -# define FORT_PCINTERP PCINTERP1D -# elif (BL_SPACEDIM == 2) -# define FORT_CINTERP CINTERP2D -# define FORT_PCINTERP PCINTERP2D -# define FORT_CARTGRIDMINMAX CARTGRIDMINMAX2D -# elif (BL_SPACEDIM == 3) -# define FORT_CINTERP CINTERP3D -# define FORT_PCINTERP PCINTERP3D -# define FORT_CARTGRIDMINMAX CARTGRIDMINMAX3D -# endif -#elif defined( BL_FORT_USE_LOWERCASE ) -# if (BL_SPACEDIM == 1) -# define FORT_PCINTERP pcinterp1d -# elif (BL_SPACEDIM == 2) -# define FORT_CINTERP cinterp2d -# define FORT_PCINTERP pcinterp2d -# define FORT_CARTGRIDMINMAX cartgridminmax2d -# elif (BL_SPACEDIM == 3) -# define FORT_CINTERP cinterp3d -# define FORT_PCINTERP pcinterp3d -# define FORT_CARTGRIDMINMAX cartgridminmax3d -# endif -#else -# if (BL_SPACEDIM == 1) -# define FORT_PCINTERP pcinterp1d_ -# elif (BL_SPACEDIM == 2) -# define FORT_CINTERP cinterp2d_ -# define FORT_PCINTERP pcinterp2d_ -# define FORT_CARTGRIDMINMAX cartgridminmax2d_ -# elif (BL_SPACEDIM == 3) -# define FORT_CINTERP cinterp3d_ -# define FORT_PCINTERP pcinterp3d_ -# define FORT_CARTGRIDMINMAX cartgridminmax3d_ -# endif -#endif - - -extern "C" { -#if (BL_SPACEDIM != 1) - void FORT_CINTERP(amrex::Real *fine, AMREX_ARLIM_P(flo), AMREX_ARLIM_P(fhi), - const int *fblo, const int *fbhi, - const int &nvar, const int &lratio, - const amrex::Real *crse, const int &clo, const int &chi, - const int *cslo, const int *cshi, - const int *fslo, const int *fshi, - amrex::Real *cslope, const int &c_len, - amrex::Real *fslope, amrex::Real *fdat, const int &f_len, - amrex::Real *foff); -#endif - - void FORT_PCINTERP(amrex::Real *fine, AMREX_ARLIM_P(flo), AMREX_ARLIM_P(fhi), - const int *fblo, const int *fbhi, - const int &lrat, const int &nvar, - const amrex::Real *crse, AMREX_ARLIM_P(clo), AMREX_ARLIM_P(chi), - const int *cblo, const int *cbhi, - amrex::Real *temp, const int &tlo, const int &thi); - -#if (BL_SPACEDIM != 1) - void FORT_CARTGRIDMINMAX (amrex::Real *data, AMREX_ARLIM_P(dlo), AMREX_ARLIM_P(dhi), - const amrex::Real *vfrac, const amrex::Real &vfeps, - amrex::Real &dmin, amrex::Real &dmax); -#endif -} - namespace amrex { bool AmrData::verbose = false; @@ -1775,7 +1708,7 @@ bool AmrData::MinMax(const Box &onBox, const string &derived, int level, bool valid(false); // does onBox intersect any grids (are minmax valid) Real minVal, maxVal; dataMin = std::numeric_limits::max(); - dataMax = -std::numeric_limits::max(); + dataMax = std::numeric_limits::lowest(); Box overlap; // our strategy here is to use the VisMF min and maxes if possible @@ -1833,22 +1766,21 @@ bool AmrData::MinMax(const Box &onBox, const string &derived, int level, if(visMFMin < dataMin || visMFMax > dataMax) { // do it the hard way DefineFab(level, compIndex, gdx); DefineFab(level, vfIndex, gdx); - Real *ddat = (*dataGrids[level][compIndex])[gpli].dataPtr(); - Real *vdat = (*dataGrids[level][vfIndex])[gpli].dataPtr(); - const int *dlo = (*dataGrids[level][compIndex])[gpli].loVect(); - const int *dhi = (*dataGrids[level][compIndex])[gpli].hiVect(); - overlap = onBox; overlap &= gpli.validbox(); Real vfMaxVal = (*dataGrids[level][vfIndex])[gpli].max(overlap, 0); if(vfMaxVal >= vfEps[level]) { ++cCountMixedFort; valid = true; - - FORT_CARTGRIDMINMAX(ddat, AMREX_ARLIM(dlo), AMREX_ARLIM(dhi), vdat, vfEps[level], - minVal, maxVal); - dataMin = std::min(dataMin, minVal); - dataMax = std::max(dataMax, maxVal); + auto const& da = (*dataGrids[level][compIndex])[gpli].const_array(); + auto const& va = (*dataGrids[level][vfIndex])[gpli].const_array(); + amrex::LoopOnCpu((*dataGrids[level][compIndex])[gpli].box(), [&] (int i, int j, int k) + { + if (va(i,j,k) >= vfEps[level]) { + dataMin = std::min(dataMin, da(i,j,k)); + dataMax = std::max(dataMax, da(i,j,k)); + } + }); } } else { ++cCountMixedSkipped; @@ -1861,22 +1793,21 @@ bool AmrData::MinMax(const Box &onBox, const string &derived, int level, if(visMFMin < dataMin || visMFMax > dataMax) { // do it the hard way DefineFab(level, compIndex, gdx); DefineFab(level, vfIndex, gdx); - Real *ddat = (*dataGrids[level][compIndex])[gpli].dataPtr(); - Real *vdat = (*dataGrids[level][vfIndex])[gpli].dataPtr(); - const int *dlo = (*dataGrids[level][compIndex])[gpli].loVect(); - const int *dhi = (*dataGrids[level][compIndex])[gpli].hiVect(); - overlap = onBox; overlap &= gpli.validbox(); Real vfMaxVal = (*dataGrids[level][vfIndex])[gpli].max(overlap, 0); if(vfMaxVal >= vfEps[level]) { ++iCountMixedFort; valid = true; - - FORT_CARTGRIDMINMAX(ddat, AMREX_ARLIM(dlo), AMREX_ARLIM(dhi), vdat, vfEps[level], - minVal, maxVal); - dataMin = std::min(dataMin, minVal); - dataMax = std::max(dataMax, maxVal); + auto const& da = (*dataGrids[level][compIndex])[gpli].const_array(); + auto const& va = (*dataGrids[level][vfIndex])[gpli].const_array(); + amrex::LoopOnCpu((*dataGrids[level][compIndex])[gpli].box(), [&] (int i, int j, int k) + { + if (va(i,j,k) >= vfEps[level]) { + dataMin = std::min(dataMin, da(i,j,k)); + dataMax = std::max(dataMax, da(i,j,k)); + } + }); } else { ++iCountAllBody; } @@ -1963,58 +1894,6 @@ int AmrData::StateNumber(const string &statename) const { } -// --------------------------------------------------------------- -void AmrData::Interp(FArrayBox &fine, FArrayBox &crse, - const Box &fine_box, int lrat) -{ -#if (BL_SPACEDIM == 1) - amrex::ignore_unused(fine, crse, fine_box, lrat); - amrex::Abort("AmrData::MinMax: should not be here for 1d."); -#else - BL_ASSERT(fine.box().contains(fine_box)); - Box crse_bx(amrex::coarsen(fine_box,lrat)); - Box fslope_bx(amrex::refine(crse_bx,lrat)); - Box cslope_bx(crse_bx); - cslope_bx.grow(1); - BL_ASSERT(crse.box() == cslope_bx); - - // alloc temp space for coarse grid slopes - Long cLen = cslope_bx.numPts(); - Real *cslope = new Real[BL_SPACEDIM*cLen]; - Long loslp = cslope_bx.index(crse_bx.smallEnd()); - Long hislp = cslope_bx.index(crse_bx.bigEnd()); - Long cslope_vol = cslope_bx.numPts(); - Long clo = 1 - loslp; - Long chi = clo + cslope_vol - 1; - cLen = hislp - loslp + 1; - - // alloc temp space for one strip of fine grid slopes - int dir; - int fLen = fslope_bx.longside(dir); - Real *fdat = new Real[(BL_SPACEDIM+2)*fLen]; - Real *foff = fdat + fLen; - Real *fslope = foff + fLen; - - - // alloc tmp space for slope calc and to allow for vectorization - const int *fblo = fine_box.loVect(); - const int *fbhi = fine_box.hiVect(); - const int *cblo = crse_bx.loVect(); - const int *cbhi = crse_bx.hiVect(); - const int *fslo = fslope_bx.loVect(); - const int *fshi = fslope_bx.hiVect(); - - FORT_CINTERP(fine.dataPtr(0),AMREX_ARLIM(fine.loVect()),AMREX_ARLIM(fine.hiVect()), - fblo,fbhi,fine.nComp(),lrat, - crse.dataPtr(0),clo,chi,cblo,cbhi,fslo,fshi, - cslope,cLen,fslope,fdat,fLen,foff); - - delete [] fdat; - delete [] cslope; -#endif -} - - // --------------------------------------------------------------- void AmrData::PcInterp(FArrayBox &fine, const FArrayBox &crse, const Box &subbox, int lrat) @@ -2026,27 +1905,15 @@ void AmrData::PcInterp(FArrayBox &fine, const FArrayBox &crse, Box fine_ovlp(subbox); fine_ovlp &= cfine; if(fine_ovlp.ok()) { - const int *fblo = fine_ovlp.smallEnd().getVect(); - const int *fbhi = fine_ovlp.bigEnd().getVect(); - Box crse_ovlp(fine_ovlp); - crse_ovlp.coarsen(lrat); - const int *cblo = crse_ovlp.smallEnd().getVect(); - const int *cbhi = crse_ovlp.bigEnd().getVect(); - Box fine_temp(crse_ovlp); - fine_temp.refine(lrat); - int tlo = fine_temp.smallEnd()[0]; - int thi = fine_temp.bigEnd()[0]; - int inextra(0); - if(fine_temp.ixType().test(0) == true) { // node type - inextra = 1; - } - Real *tempSpace = new Real[thi-tlo+1+inextra]; - FORT_PCINTERP(fine.dataPtr(0),AMREX_ARLIM(fine.loVect()),AMREX_ARLIM(fine.hiVect()), - fblo,fbhi, lrat,fine.nComp(), - crse.dataPtr(),AMREX_ARLIM(crse.loVect()),AMREX_ARLIM(crse.hiVect()), - cblo,cbhi, tempSpace,tlo,thi); - - delete [] tempSpace; + auto const& fa = fine.array(); + auto const& ca = crse.const_array(); + amrex::LoopOnCpu(fine_ovlp, fine.nComp(), [&] (int i, int j, int k, int n) + { + int ic = amrex::coarsen(i,lrat); + int jc = amrex::coarsen(j,lrat); + int kc = amrex::coarsen(k,lrat); + fa(i,j,k,n) = ca(ic,jc,kc,n); + }); } } diff --git a/Src/Extern/amrdata/AMReX_FABUTIL_1D.F b/Src/Extern/amrdata/AMReX_FABUTIL_1D.F deleted file mode 100644 index 42824b08963..00000000000 --- a/Src/Extern/amrdata/AMReX_FABUTIL_1D.F +++ /dev/null @@ -1,297 +0,0 @@ -c ::: SCCS stuff "@(#)FABUTIL_2D.F 3.1\t6/25/93" - -#define FORT_CINTERP cinterp1d -#define FORT_PCINTERP pcinterp1d - -#include "AMReX_REAL.H" - -#ifdef BL_USE_FLOAT -#define REAL_T real -#define bigreal 1.0e30 -#define zero 0.0e0 -#define one 1.0e0 -#define half 0.5e0 -#else -#define REAL_T double precision -#define bigreal 1.0d30 -#define zero 0.0d0 -#define one 1.0d0 -#define half 0.5d0 -#endif - -c ::: -------------------------------------------------------------- - subroutine FORT_CINTERP (fine,floi1,floi2,fhii1,fhii2, fblo,fbhi, - $ nvar,lratio, crse,clo,chi,cblo,cbhi,fslo,fshi, cslope,clen, - $ fslope,fdat,flen,voff) - - implicit none - - integer floi1,floi2, fhii1,fhii2 - - integer fblo(2), fbhi(2) - integer cblo(2), cbhi(2) - integer fslo(2), fshi(2) - integer lratio, nvar, clen, flen, clo, chi - REAL_T fine(floi1 :fhii1 ,floi2 :fhii2, nvar) - REAL_T crse(clo:chi, nvar) - REAL_T cslope(clo:chi, 2) - REAL_T fslope(flen, 2) - REAL_T fdat(flen) - REAL_T voff(flen) - -c ::: NOTE: data must be sent in so that -c ::: cslope(1,*) and crse(1,*) are associated with -c ::: the same cell - -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: cinterp: conservative interpolation from coarse grid to -c ::: subregion of fine grid defined by (fblo,fbhi) -c ::: -c ::: Inputs/Outputs -c ::: fine <=> (modify) fine grid array -c ::: flo,fhi => (const) index limits of fine grid -c ::: fblo,fbhi => (const) subregion of fine grid to get values -c ::: nvar => (const) number of variables in state vector -c ::: lratio => (const) refinement ratio between levels -c ::: -c ::: crse => (const) coarse grid data widended by 1 zone -c ::: and unrolled -c ::: clo,chi => (const) one dimensional limits of crse grid -c ::: cslo,cshi => (const) coarse grid index limits where -c ::: slopes are to be defined. This is -c ::: the projection of (fblo,fbhi) down -c ::: to the coarse level -c ::: fslo,fshi => (const) fine grid index limits where -c ::: slopes are needed. This is the -c ::: refinement of (cslo,cshi) and -c ::: contains but may not be identical -c ::: to (fblo,fbhi). -c ::: cslope => (modify) temp array coarse grid slopes -c ::: clen => (const) length of coarse gtid slopes -c ::: fslope => (modify) temp array for fine grid slope -c ::: flen => (const) length of fine grid slope array -c ::: fdat => (const) temp array for fine grid data -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -c ::: local var - integer n, fn - integer i, ic, ioff - integer j, jc, joff - integer ist, jst - REAL_T hafrat, volratio - REAL_T cen, forw, back, slp - REAL_T xoff, yoff - integer ncbx, ncby - integer ncsx, ncsy - integer islo, jslo - integer icc, istart, iend - integer lenx, leny, maxlen - - call bl_abort("fix FORT_CINTERP for 1d.") - - hafrat = half*float(lratio-1) - volratio = one/float(lratio) - - ncbx = cbhi(1)-cblo(1)+1 - ncby = cbhi(2)-cblo(2)+1 - ncsx = ncbx+2 - ncsy = ncby+2 - ist = 1 - jst = ncsx - islo = cblo(1)-1 - jslo = cblo(2)-1 - lenx = fbhi(1)-fblo(1)+1 - leny = fbhi(2)-fblo(2)+1 - maxlen = max(lenx,leny) - if (maxlen .eq. lenx) then - do 100 i = fblo(1), fbhi(1) - fn = i-fslo(1)+1 - ioff = mod(fn-1,lratio) - voff(fn) = float(ioff)-hafrat -100 continue - else - do 110 j = fblo(2), fbhi(2) - fn = j-fslo(2)+1 - joff = mod(fn-1,lratio) - voff(fn) = float(joff)-hafrat -110 continue - end if - do 120 n = 1, nvar - -c ::: ::::: compute slopes in x direction - do 130 i = 1, clen - cen = half*(crse(i+ist,n)-crse(i-ist,n)) - forw = crse(i+ist,n)-crse(i,n) - back = crse(i,n)-crse(i-ist,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,1)=merge(slp,zero,forw*back>=0.0d0) -130 continue -c ::: ::::: compute slopes in y direction - do 140 i = 1, clen - cen = half*(crse(i+jst,n)-crse(i-jst,n)) - forw = crse(i+jst,n)-crse(i,n) - back = crse(i,n)-crse(i-jst,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,2)=merge(slp,zero,forw*back>=0.0d0) -140 continue - if (maxlen .eq. lenx) then - do 150 jc = cblo(2), cbhi(2) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 160 ioff = 1, lratio - icc = clo + ist + jst*(jc-jslo) - istart = ioff - iend = ioff + (ncbx-1)*lratio - do 170 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fdat(fn) = crse(icc,n) - icc = icc + ist -170 continue -160 continue - - do 180 joff = 0, lratio-1 - j = lratio*jc + joff - if (j .lt. fblo(2)) then - goto 180 -c --- next --- - end if - if (j .gt. fbhi(2)) then - goto 181 -c --- break --- - end if - yoff = float(joff)-hafrat - - do 190 i = fblo(1), fbhi(1) - fn = i-fslo(1)+1 - fine(i,j,n) = fdat(fn) + volratio* (voff(fn)* - $ fslope(fn,1)+yoff*fslope(fn,2)) -190 continue -180 continue -181 continue -150 continue - else - do 200 ic = cblo(1), cbhi(1) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 210 joff = 1, lratio - icc = clo + ist*(ic-islo) + jst - istart = joff - iend = joff + (ncby-1)*lratio - do 220 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fdat(fn) = crse(icc,n) - icc = icc + jst -220 continue -210 continue - - do 230 ioff = 0, lratio-1 - i = lratio*ic + ioff - if (i .lt. fblo(1)) then - goto 230 -c --- next --- - end if - if (i .gt. fbhi(1)) then - goto 231 -c --- break --- - end if - xoff = float(ioff)-hafrat - - do 240 j = fblo(2), fbhi(2) - fn = j-fslo(2)+1 - fine(i,j,n) = fdat(fn) + volratio* (xoff* - $ fslope(fn,1)+voff(fn)*fslope(fn,2)) -240 continue -230 continue -231 continue -200 continue - end if -120 continue - - return - end - - -c ::: -------------------------------------------------------------- - subroutine FORT_PCINTERP (fine,floi1,fhii1,fblo, fbhi,lrat, - $ nvar, crse,cloi1,chii1,cblo, cbhi,temp,tloi,thii) - - implicit none - - integer floi1 - integer fhii1 - integer cloi1 - integer chii1 - - integer fblo(1), fbhi(1) - integer cblo(1), cbhi(1) - integer lrat, nvar, tloi, thii - REAL_T fine(floi1 :fhii1, nvar) - REAL_T crse(cloi1 :chii1, nvar) - REAL_T temp(tloi:thii + 1) -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: pcinterp: use piecewise constant interpolation to define -c ::: values on the subregion of the fine FAB defined -c ::: by (fblo,fbhi). -c ::: -c ::: Inputs/Outputs -c ::: fine <=> (modify) fab to get interpolated values -c ::: flo,fhi => (const) index limits of fine -c ::: fblo,fbhi => (const) subregion of fine grid to get values -c ::: crse => (const) fab holding coarse grid values -c ::: clo,chi => (const) index limits of src -c ::: cblo,cbhi => (const) subregion of coarse grid holding values -c ::: temp => (modify) temporary space for vectorization -c ::: tlo,thi => (const) index limits of temp space -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: local var - integer i,ic,ioff,n - integer ixproj,ii,ll - ixproj(ii,ll) = (ii + ll*iabs(ii))/ll - iabs(ii) - - do 130 n = 1, nvar - do 140 ioff = 0, lrat-1 - do 150 ic = cblo(1),cbhi(1) - i = lrat*ic + ioff - temp(i) = crse(ic,n) -150 continue -140 continue - do 160 i = fblo(1), fbhi(1) - fine(i,n) = temp(i) -160 continue -130 continue - - return - end - - -c ::: -------------------------------------------------------------- - subroutine cartgridminmax1d (data, lo1, lo2, hi1, hi2, - $ vfracdata, vfeps, dmin, dmax) - implicit none - - integer lo1, lo2, hi1, hi2 - REAL_T data(lo1:hi1 ,lo2:hi2) - REAL_T vfracdata(lo1:hi1 ,lo2:hi2) - REAL_T vfeps, dmin, dmax - - integer i, j - - call bl_abort("fix cartgridminmax1d for 1d.") - - dmax = -bigreal - dmin = bigreal - do 420 j = lo2, hi2 - do 430 i = lo1, hi1 -c print *, "i j vfracdata(i,j) = ",i,j,vfracdata(i,j) - if ( .not. (vfracdata(i,j).lt.vfeps)) then - dmax = max(dmax,data(i,j)) - dmin = min(dmin,data(i,j)) - endif -430 continue -420 continue - - return - end - diff --git a/Src/Extern/amrdata/AMReX_FABUTIL_2D.F b/Src/Extern/amrdata/AMReX_FABUTIL_2D.F deleted file mode 100644 index 8748760ff35..00000000000 --- a/Src/Extern/amrdata/AMReX_FABUTIL_2D.F +++ /dev/null @@ -1,297 +0,0 @@ -c ::: SCCS stuff "@(#)FABUTIL_2D.F 3.1\t6/25/93" - -#define FORT_CINTERP cinterp2d -#define FORT_PCINTERP pcinterp2d - -#include "AMReX_REAL.H" - -#ifdef BL_USE_FLOAT -#define REAL_T real -#define bigreal 1.0e30 -#define zero 0.0e0 -#define one 1.0e0 -#define half 0.5e0 -#else -#define REAL_T double precision -#define bigreal 1.0d30 -#define zero 0.0d0 -#define one 1.0d0 -#define half 0.5d0 -#endif - -c ::: -------------------------------------------------------------- - subroutine FORT_CINTERP (fine,floi1,floi2,fhii1,fhii2, fblo,fbhi, - $ nvar,lratio, crse,clo,chi,cblo,cbhi,fslo,fshi, cslope,clen, - $ fslope,fdat,flen,voff) - - implicit none - - integer floi1,floi2, fhii1,fhii2 - - integer fblo(2), fbhi(2) - integer cblo(2), cbhi(2) - integer fslo(2), fshi(2) - integer lratio, nvar, clen, flen, clo, chi - REAL_T fine(floi1 :fhii1 ,floi2 :fhii2, nvar) - REAL_T crse(clo:chi, nvar) - REAL_T cslope(clo:chi, 2) - REAL_T fslope(flen, 2) - REAL_T fdat(flen) - REAL_T voff(flen) - -c ::: NOTE: data must be sent in so that -c ::: cslope(1,*) and crse(1,*) are associated with -c ::: the same cell - -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: cinterp: conservative interpolation from coarse grid to -c ::: subregion of fine grid defined by (fblo,fbhi) -c ::: -c ::: Inputs/Outputs -c ::: fine <=> (modify) fine grid array -c ::: flo,fhi => (const) index limits of fine grid -c ::: fblo,fbhi => (const) subregion of fine grid to get values -c ::: nvar => (const) number of variables in state vector -c ::: lratio => (const) refinement ratio between levels -c ::: -c ::: crse => (const) coarse grid data widended by 1 zone -c ::: and unrolled -c ::: clo,chi => (const) one dimensional limits of crse grid -c ::: cslo,cshi => (const) coarse grid index limits where -c ::: slopes are to be defined. This is -c ::: the projection of (fblo,fbhi) down -c ::: to the coarse level -c ::: fslo,fshi => (const) fine grid index limits where -c ::: slopes are needed. This is the -c ::: refinement of (cslo,cshi) and -c ::: contains but may not be identical -c ::: to (fblo,fbhi). -c ::: cslope => (modify) temp array coarse grid slopes -c ::: clen => (const) length of coarse gtid slopes -c ::: fslope => (modify) temp array for fine grid slope -c ::: flen => (const) length of fine grid slope array -c ::: fdat => (const) temp array for fine grid data -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -c ::: local var - integer n, fn - integer i, ic, ioff - integer j, jc, joff - integer ist, jst - REAL_T hafrat, volratio - REAL_T cen, forw, back, slp - REAL_T xoff, yoff - integer ncbx, ncby - integer ncsx, ncsy - integer islo, jslo - integer icc, istart, iend - integer lenx, leny, maxlen - - hafrat = half*float(lratio-1) - volratio = one/float(lratio) - - ncbx = cbhi(1)-cblo(1)+1 - ncby = cbhi(2)-cblo(2)+1 - ncsx = ncbx+2 - ncsy = ncby+2 - ist = 1 - jst = ncsx - islo = cblo(1)-1 - jslo = cblo(2)-1 - lenx = fbhi(1)-fblo(1)+1 - leny = fbhi(2)-fblo(2)+1 - maxlen = max(lenx,leny) - if (maxlen .eq. lenx) then - do 100 i = fblo(1), fbhi(1) - fn = i-fslo(1)+1 - ioff = mod(fn-1,lratio) - voff(fn) = float(ioff)-hafrat -100 continue - else - do 110 j = fblo(2), fbhi(2) - fn = j-fslo(2)+1 - joff = mod(fn-1,lratio) - voff(fn) = float(joff)-hafrat -110 continue - end if - do 120 n = 1, nvar - -c ::: ::::: compute slopes in x direction - do 130 i = 1, clen - cen = half*(crse(i+ist,n)-crse(i-ist,n)) - forw = crse(i+ist,n)-crse(i,n) - back = crse(i,n)-crse(i-ist,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,1)=merge(slp,zero,forw*back>=0.0d0) -130 continue -c ::: ::::: compute slopes in y direction - do 140 i = 1, clen - cen = half*(crse(i+jst,n)-crse(i-jst,n)) - forw = crse(i+jst,n)-crse(i,n) - back = crse(i,n)-crse(i-jst,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,2)=merge(slp,zero,forw*back>=0.0d0) -140 continue - if (maxlen .eq. lenx) then - do 150 jc = cblo(2), cbhi(2) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 160 ioff = 1, lratio - icc = clo + ist + jst*(jc-jslo) - istart = ioff - iend = ioff + (ncbx-1)*lratio - do 170 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fdat(fn) = crse(icc,n) - icc = icc + ist -170 continue -160 continue - - do 180 joff = 0, lratio-1 - j = lratio*jc + joff - if (j .lt. fblo(2)) then - goto 180 -c --- next --- - end if - if (j .gt. fbhi(2)) then - goto 181 -c --- break --- - end if - yoff = float(joff)-hafrat - - do 190 i = fblo(1), fbhi(1) - fn = i-fslo(1)+1 - fine(i,j,n) = fdat(fn) + volratio* (voff(fn)* - $ fslope(fn,1)+yoff*fslope(fn,2)) -190 continue -180 continue -181 continue -150 continue - else - do 200 ic = cblo(1), cbhi(1) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 210 joff = 1, lratio - icc = clo + ist*(ic-islo) + jst - istart = joff - iend = joff + (ncby-1)*lratio - do 220 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fdat(fn) = crse(icc,n) - icc = icc + jst -220 continue -210 continue - - do 230 ioff = 0, lratio-1 - i = lratio*ic + ioff - if (i .lt. fblo(1)) then - goto 230 -c --- next --- - end if - if (i .gt. fbhi(1)) then - goto 231 -c --- break --- - end if - xoff = float(ioff)-hafrat - - do 240 j = fblo(2), fbhi(2) - fn = j-fslo(2)+1 - fine(i,j,n) = fdat(fn) + volratio* (xoff* - $ fslope(fn,1)+voff(fn)*fslope(fn,2)) -240 continue -230 continue -231 continue -200 continue - end if -120 continue - - return - end - - -c ::: -------------------------------------------------------------- - subroutine FORT_PCINTERP (fine,floi1,floi2,fhii1,fhii2,fblo, - $ fbhi,lrat,nvar,crse,cloi1,cloi2,chii1,chii2,cblo, - $ cbhi,temp,tloi,thii) - - implicit none - - integer floi1,floi2 - integer fhii1,fhii2 - integer cloi1,cloi2 - integer chii1,chii2 - - integer fblo(2), fbhi(2) - integer cblo(2), cbhi(2) - integer lrat, nvar, tloi, thii - REAL_T fine(floi1 :fhii1 ,floi2 :fhii2, nvar) - REAL_T crse(cloi1 :chii1 ,cloi2 :chii2, nvar) - REAL_T temp(tloi:thii + 1) -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: pcinterp: use piecewise constant interpolation to define -c ::: values on the subregion of the fine FAB defined -c ::: by (fblo,fbhi). -c ::: -c ::: Inputs/Outputs -c ::: fine <=> (modify) fab to get interpolated values -c ::: flo,fhi => (const) index limits of fine -c ::: fblo,fbhi => (const) subregion of fine grid to get values -c ::: crse => (const) fab holding coarse grid values -c ::: clo,chi => (const) index limits of src -c ::: cblo,cbhi => (const) subregion of coarse grid holding values -c ::: temp => (modify) temporary space for vectorization -c ::: tlo,thi => (const) index limits of temp space -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: local var - integer i,j,k,ic,jc,kc,ioff,n - integer ixproj,ii,ll - ixproj(ii,ll) = (ii + ll*iabs(ii))/ll - iabs(ii) - - do 120 j = fblo(2), fbhi(2) - jc = ixproj(j,lrat) - do 130 n = 1, nvar - do 140 ioff = 0, lrat-1 - do 150 ic = cblo(1),cbhi(1) - i = lrat*ic + ioff - temp(i) = crse(ic,jc,n) -150 continue -140 continue - do 160 i = fblo(1), fbhi(1) - fine(i,j,n) = temp(i) -160 continue -130 continue -120 continue - - return - end - - -c ::: -------------------------------------------------------------- - subroutine cartgridminmax2d (data, lo1, lo2, hi1, hi2, - $ vfracdata, vfeps, dmin, dmax) - implicit none - - integer lo1, lo2, hi1, hi2 - REAL_T data(lo1:hi1 ,lo2:hi2) - REAL_T vfracdata(lo1:hi1 ,lo2:hi2) - REAL_T vfeps, dmin, dmax - - integer i, j - - dmax = -bigreal - dmin = bigreal - do 420 j = lo2, hi2 - do 430 i = lo1, hi1 -c print *, "i j vfracdata(i,j) = ",i,j,vfracdata(i,j) - if ( .not. (vfracdata(i,j).lt.vfeps)) then - dmax = max(dmax,data(i,j)) - dmin = min(dmin,data(i,j)) - endif -430 continue -420 continue - - return - end - diff --git a/Src/Extern/amrdata/AMReX_FABUTIL_3D.F b/Src/Extern/amrdata/AMReX_FABUTIL_3D.F deleted file mode 100644 index 8a6ff561577..00000000000 --- a/Src/Extern/amrdata/AMReX_FABUTIL_3D.F +++ /dev/null @@ -1,758 +0,0 @@ -c ::: SCCS stuff "@(#)FABUTIL_3D.F 3.1\t6/25/93" - -#define FORT_CINTERP cinterp3d -#define FORT_PCINTERP pcinterp3d -#define FORT_VCINTERP vcinterp3d -#define FORT_CARTGRIDMINMAX cartgridminmax3d - -#include "AMReX_REAL.H" - -#ifdef BL_USE_FLOAT -#define REAL_T real -#define bigreal 1.0e30 -#define zero 0.0e0 -#define one 1.0e0 -#define half 0.5e0 -#else -#define REAL_T double precision -#define bigreal 1.0d30 -#define zero 0.0d0 -#define one 1.0d0 -#define half 0.5d0 -#endif - -#define IX_PROJ(A,B) (A+B*iabs(A))/B-iabs(A) - - -c ::: -------------------------------------------------------------- - subroutine FORT_CARTGRIDMINMAX (data, dlo1, dlo2, dlo3, dhi1, - $ dhi2, dhi3, vfracdata, vfeps, - $ dmin, dmax) - implicit none - - integer dlo1, dlo2, dlo3, dhi1, dhi2, dhi3 - REAL_T data(dlo1:dhi1 ,dlo2:dhi2, dlo3:dhi3) - REAL_T vfracdata(dlo1:dhi1 ,dlo2:dhi2, dlo3:dhi3) - REAL_T vfeps, dmin, dmax - - integer i, j, k - - dmax = -bigreal - dmin = bigreal - do 410 k = dlo3, dhi3 - do 420 j = dlo2, dhi2 - do 430 i = dlo1, dhi1 - if ( (vfracdata(i,j,k).ge.vfeps)) then - dmax = max(dmax,data(i,j,k)) - dmin = min(dmin,data(i,j,k)) - endif -430 continue -420 continue -410 continue - - return - end - - -c ::: -------------------------------------------------------------- - subroutine FORT_CINTERP (fine,floi1,floi2,floi3,fhii1,fhii2,fhii3, - $ fblo,fbhi,nvar,lratio, crse,clo,chi,cblo,cbhi,fslo,fshi, cslope, - $ clen,fslope,fdat,flen,voff) - - implicit none - - integer floi1,floi2,floi3, fhii1,fhii2,fhii3 - integer fblo(3), fbhi(3) - integer cblo(3), cbhi(3) - integer fslo(3), fshi(3) - integer lratio, nvar, clen, flen, clo, chi - REAL_T fine(floi1:fhii1,floi2:fhii2,floi3:fhii3,nvar) - REAL_T crse(clo:chi, nvar) - REAL_T cslope(clo:chi, 3) - REAL_T fslope(flen, 3) - REAL_T fdat(flen) - REAL_T voff(flen) - -c ::: NOTE: data must be sent in so that -c ::: cslope(1,*) and crse(1,*) are associated with -c ::: the same cell - -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: cinterp: conservative interpolation from coarse grid to -c ::: subregion of fine grid defined by (fblo,fbhi) -c ::: -c ::: Inputs/Outputs -c ::: fine <=> (modify) fine grid array -c ::: flo,fhi => (const) index limits of fine grid -c ::: fblo,fbhi => (const) subregion of fine grid to get values -c ::: nvar => (const) number of variables in state vector -c ::: lratio => (const) refinement ratio between levels -c ::: -c ::: crse => (const) coarse grid data widended by 1 zone -c ::: and unrolled -c ::: clo,chi => (const) one dimensional limits of crse grid -c ::: cslo,cshi => (const) coarse grid index limits where -c ::: slopes are to be defined. This is -c ::: the projection of (fblo,fbhi) down -c ::: to the coarse level -c ::: fslo,fshi => (const) fine grid index limits where -c ::: slopes are needed. This is the -c ::: refinement of (cslo,cshi) and -c ::: contains but may not be identical -c ::: to (fblo,fbhi). -c ::: cslope => (modify) temp array coarse grid slopes -c ::: clen => (const) length of coarse gtid slopes -c ::: fslope => (modify) temp array for fine grid slope -c ::: flen => (const) length of fine grid slope array -c ::: fdat => (const) temp array for fine grid data -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -c ::: local var - integer n, fn - integer i, ic, ioff - integer j, jc, joff - integer k, kc, koff - integer ist, jst, kst - REAL_T hafrat, volratio - REAL_T cen, forw, back, slp - REAL_T xoff, yoff, zoff - integer ncbx, ncby, ncbz - integer ncsx, ncsy, ncsz - integer islo, jslo, kslo - integer icc, istart, iend - integer lenx, leny, lenz, maxlen - - hafrat = half*float(lratio-1) - volratio = one/float(lratio) - - ncbx = cbhi(1)-cblo(1)+1 - ncby = cbhi(2)-cblo(2)+1 - ncbz = cbhi(3)-cblo(3)+1 - ncsx = ncbx+2 - ncsy = ncby+2 - ncsz = ncbz+2 - ist = 1 - jst = ncsx - kst = ncsx*ncsy - islo = cblo(1)-1 - jslo = cblo(2)-1 - kslo = cblo(3)-1 - lenx = fbhi(1)-fblo(1)+1 - leny = fbhi(2)-fblo(2)+1 - lenz = fbhi(3)-fblo(3)+1 - maxlen = max(lenx,leny,lenz) - if (maxlen .eq. lenx) then - do 100 i = fblo(1), fbhi(1) - fn = i-fslo(1)+1 - ioff = mod(fn-1,lratio) - voff(fn) = float(ioff)-hafrat -100 continue - else if (maxlen .eq. leny) then - do 110 j = fblo(2), fbhi(2) - fn = j-fslo(2)+1 - joff = mod(fn-1,lratio) - voff(fn) = float(joff)-hafrat -110 continue - else - do 120 k = fblo(3), fbhi(3) - fn = k-fslo(3)+1 - koff = mod(fn-1,lratio) - voff(fn) = float(koff)-hafrat -120 continue - end if - do 130 n = 1, nvar - -c ::: ::::: compute slopes in x direction - do 140 i = 1, clen - cen = half*(crse(i+ist,n)-crse(i-ist,n)) - forw = crse(i+ist,n)-crse(i,n) - back = crse(i,n)-crse(i-ist,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,1)=merge(slp,zero,forw*back>=0.0d0) -140 continue -c ::: ::::: compute slopes in y direction - do 150 i = 1, clen - cen = half*(crse(i+jst,n)-crse(i-jst,n)) - forw = crse(i+jst,n)-crse(i,n) - back = crse(i,n)-crse(i-jst,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,2)=merge(slp,zero,forw*back>=0.0d0) -150 continue -c ::: ::::: compute slopes in z direction - do 160 i = 1, clen - cen = half*(crse(i+kst,n)-crse(i-kst,n)) - forw = crse(i+kst,n)-crse(i,n) - back = crse(i,n)-crse(i-kst,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,3)=merge(slp,zero,forw*back>=0.0d0) -160 continue - - if (maxlen .eq. lenx) then - do 170 kc = cblo(3), cbhi(3) - do 180 jc = cblo(2), cbhi(2) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 190 ioff = 1, lratio - icc = clo + ist + jst*(jc-jslo) + kst*(kc- - $ kslo) - istart = ioff - iend = ioff + (ncbx-1)*lratio - do 200 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fslope(fn,3) = cslope(icc,3) - fdat(fn) = crse(icc,n) - icc = icc + ist -200 continue -190 continue - - do 210 koff = 0, lratio-1 - k = lratio*kc + koff - if (k .lt. fblo(3)) then - goto 210 -c --- next --- - end if - if (k .gt. fbhi(3)) then - goto 211 -c --- break --- - end if - zoff = float(koff)-hafrat - do 220 joff = 0, lratio-1 - j = lratio*jc + joff - if (j .lt. fblo(2)) then - goto 220 -c --- next --- - end if - if (j .gt. fbhi(2)) then - goto 221 -c --- break --- - end if - yoff = float(joff)-hafrat - - do 230 i = fblo(1), fbhi(1) - fn = i-fslo(1)+1 - fine(i,j,k,n) = fdat(fn) + volratio* - $ (voff(fn)*fslope(fn,1)+yoff*fslope(fn, - $ 2)+ zoff*fslope(fn,3)) -230 continue -220 continue -221 continue -210 continue -211 continue -180 continue -170 continue - else if (maxlen .eq. leny) then - do 240 kc = cblo(3), cbhi(3) - do 250 ic = cblo(1), cbhi(1) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 260 joff = 1, lratio - icc = clo + ist*(ic-islo) + jst + kst*(kc- - $ kslo) - istart = joff - iend = joff + (ncby-1)*lratio - do 270 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fslope(fn,3) = cslope(icc,3) - fdat(fn) = crse(icc,n) - icc = icc + jst -270 continue -260 continue - - do 280 koff = 0, lratio-1 - k = lratio*kc + koff - if (k .lt. fblo(3)) then - goto 280 -c --- next --- - end if - if (k .gt. fbhi(3)) then - goto 281 -c --- break --- - end if - zoff = float(koff)-hafrat - do 290 ioff = 0, lratio-1 - i = lratio*ic + ioff - if (i .lt. fblo(1)) then - goto 290 -c --- next --- - end if - if (i .gt. fbhi(1)) then - goto 291 -c --- break --- - end if - xoff = float(ioff)-hafrat - - do 300 j = fblo(2), fbhi(2) - fn = j-fslo(2)+1 - fine(i,j,k,n) = fdat(fn) + volratio* - $ (xoff*fslope(fn,1)+voff(fn)*fslope(fn, - $ 2)+ zoff*fslope(fn,3)) -300 continue -290 continue -291 continue -280 continue -281 continue -250 continue -240 continue - else - do 310 jc = cblo(2), cbhi(2) - do 320 ic = cblo(1), cbhi(1) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 330 koff = 1, lratio - icc = clo + ist*(ic-islo) + jst*(jc-jslo) + - $ kst - istart = koff - iend = koff + (ncbz-1)*lratio - do 340 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fslope(fn,3) = cslope(icc,3) - fdat(fn) = crse(icc,n) - icc = icc + kst -340 continue -330 continue - - do 350 joff = 0, lratio-1 - j = lratio*jc + joff - if (j .lt. fblo(2)) then - goto 350 -c --- next --- - end if - if (j .gt. fbhi(2)) then - goto 351 -c --- break --- - end if - yoff = float(joff)-hafrat - do 360 ioff = 0, lratio-1 - i = lratio*ic + ioff - if (i .lt. fblo(1)) then - goto 360 -c --- next --- - end if - if (i .gt. fbhi(1)) then - goto 361 -c --- break --- - end if - xoff = float(ioff)-hafrat - - do 370 k = fblo(3), fbhi(3) - fn = k-fslo(3)+1 - fine(i,j,k,n) = fdat(fn) + volratio* - $ (xoff*fslope(fn,1)+yoff*fslope(fn,2)+ - $ voff(fn)*fslope(fn,3)) -370 continue -360 continue -361 continue -350 continue -351 continue -320 continue -310 continue - end if -130 continue - - return - end - - -c ::: -------------------------------------------------------------- - subroutine FORT_PCINTERP (fine,floi1,floi2,floi3,fhii1,fhii2, - $ fhii3,fblo,fbhi,lrat,nvar,crse,cloi1,cloi2,cloi3,chii1,chii2, - $ chii3,cblo,cbhi,temp,tloi,thii) - - implicit none - - integer floi1,floi2,floi3 - integer fhii1,fhii2,fhii3 - integer cloi1,cloi2,cloi3 - integer chii1,chii2,chii3 - - integer fblo(3), fbhi(3) - integer cblo(3), cbhi(3) - integer lrat, nvar, tloi, thii - REAL_T fine(floi1:fhii1,floi2:fhii2,floi3:fhii3, nvar) - REAL_T crse(cloi1:chii1,cloi2:chii2,cloi3:chii3, nvar) - REAL_T temp(tloi:thii + 1) -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: pcinterp: use piecewise constant interpolation to define -c ::: values on the subregion of the fine FAB defined -c ::: by (fblo,fbhi). -c ::: -c ::: Inputs/Outputs -c ::: fine <=> (modify) fab to get interpolated values -c ::: flo,fhi => (const) index limits of fine -c ::: fblo,fbhi => (const) subregion of fine grid to get values -c ::: crse => (const) fab holding coarse grid values -c ::: clo,chi => (const) index limits of src -c ::: cblo,cbhi => (const) subregion of coarse grid holding values -c ::: temp => (modify) temporary space for vectorization -c ::: tlo,thi => (const) index limits of temp space -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: local var - integer i,j,k,ic,jc,kc,ioff,n - integer ixproj,ii,ll - ixproj(ii,ll) = (ii + ll*iabs(ii))/ll - iabs(ii) - - do 130 k = fblo(3), fbhi(3) - kc = ixproj(k,lrat) - do 140 j = fblo(2), fbhi(2) - jc = ixproj(j,lrat) - do 150 n = 1, nvar - do 160 ioff = 0, lrat-1 - do 170 ic = cblo(1),cbhi(1) - i = lrat*ic + ioff - temp(i) = crse(ic,jc,kc,n) -170 continue -160 continue - do 180 i = fblo(1), fbhi(1) - fine(i,j,k,n) = temp(i) -180 continue -150 continue -140 continue -130 continue - - return - end - - -c ::: -------------------------------------------------------------- - subroutine FORT_VCINTERP (fine,floi1,floi2,floi3,fhii1,fhii2, - $ fhii3,fbloi1,fbloi2,fbloi3, fbhii1,fbhii2,fbhii3,nvar,lratio, - $ crse,clo,chi,cblo,cbhi, fslo,fshi,cvloi1,cvloi2,cvloi3,cvhii1, - $ cvhii2,cvhii3, cslope,clen,fslope,fdat,flen,voff, cvc1,fvc1,cvc2, - $ fvc2,cvc3,fvc3) - - implicit none - - integer floi1,floi2,floi3 - integer fhii1,fhii2,fhii3 - integer fbloi1,fbloi2,fbloi3 - integer fbhii1,fbhii2,fbhii3 - integer cvloi1,cvloi2,cvloi3 - integer cvhii1,cvhii2,cvhii3 - - integer cblo(3), cbhi(3) - integer fslo(3), fshi(3) - integer lratio, nvar, clen, flen, clo, chi - REAL_T fine(floi1:fhii1,floi2:fhii2,floi3:fhii3,nvar) - REAL_T crse(clo:chi, nvar) - REAL_T cslope(clo:chi, 3) - REAL_T fslope(flen, 3) - REAL_T fdat(flen) - REAL_T voff(flen) - - REAL_T cvc1(cvloi1 :cvhii1+1) - REAL_T cvc2(cvloi2 :cvhii2+1) - REAL_T cvc3(cvloi3 :cvhii3+1) - REAL_T fvc1(fbloi1 :fbhii1+1) - REAL_T fvc2(fbloi2 :fbhii2+1) - REAL_T fvc3(fbloi3 :fbhii3+1) - -c ::: NOTE: data must be sent in so that -c ::: cslope(1,*) and crse(1,*) are associated with -c ::: the same cell - -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -c ::: vcinterp: conservative interpolation in volume coordinates -c ::: from coarse grid to -c ::: subregion of fine grid defined by (fblo,fbhi) -c ::: -c ::: Inputs/Outputs -c ::: fine <=> (modify) fine grid array -c ::: flo,fhi => (const) index limits of fine grid -c ::: fblo,fbhi => (const) subregion of fine grid to get values -c ::: nvar => (const) number of variables in state vector -c ::: lratio => (const) refinement ratio between levels -c ::: -c ::: crse => (const) coarse grid data widended by 1 zone -c ::: and unrolled -c ::: clo,chi => (const) one dimensional limits of crse grid -c ::: cslo,cshi => (const) coarse grid index limits where -c ::: slopes are to be defined. This is -c ::: the projection of (fblo,fbhi) down -c ::: to the coarse level -c ::: fslo,fshi => (const) fine grid index limits where -c ::: slopes are needed. This is the -c ::: refinement of (cslo,cshi) and -c ::: contains but may not be identical -c ::: to (fblo,fbhi). -c ::: cslope => (modify) temp array coarse grid slopes -c ::: clen => (const) length of coarse gtid slopes -c ::: fslope => (modify) temp array for fine grid slope -c ::: flen => (const) length of fine grid slope array -c ::: fdat => (const) temp array for fine grid data -c ::: cvlo,cvhi => (const) coarse grid index limits where -c ::: volume coordinates are defined -c ::: cvc1 => (const) coarse grid volume coords, x-dir -c ::: cvc2 => (const) coarse grid volume coords, y_dir -c ::: cvc3 => (const) coarse grid volume coords, z-dir -c ::: fvc1 => (const) fine grid volume coords, x-dir -c ::: fvc2 => (const) fine grid volume coords, y-dir -c ::: fvc3 => (const) fine grid volume coords, z-dir -c ::: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -c ::: local var - integer n, fn - integer i, ic, ioff - integer j, jc, joff - integer k, kc, koff - integer ist, jst, kst - REAL_T cen, forw, back, slp - REAL_T xoff, yoff, zoff - REAL_T fcen, ccen - integer ncbx, ncby, ncbz - integer ncsx, ncsy, ncsz - integer islo, jslo, kslo - integer icc, istart, iend - integer lenx, leny, lenz, maxlen - - ncbx = cbhi(1)-cblo(1)+1 - ncby = cbhi(2)-cblo(2)+1 - ncbz = cbhi(3)-cblo(3)+1 - ncsx = ncbx+2 - ncsy = ncby+2 - ncsz = ncbz+2 - ist = 1 - jst = ncsx - kst = ncsx*ncsy - islo = cblo(1)-1 - jslo = cblo(2)-1 - kslo = cblo(3)-1 - lenx = fbhii1-fbloi1+1 - leny = fbhii2-fbloi2+1 - lenz = fbhii3-fbloi3+1 - maxlen = max(lenx,leny,lenz) - if (maxlen .eq. lenx) then - do 380 i = fbloi1, fbhii1 - fn = i-fslo(1)+1 - ic = IX_PROJ(i,lratio) - fcen = half*(fvc1(i)+fvc1(i+1)) - ccen = half*(cvc1(ic)+cvc1(ic+1)) - voff(fn) = (fcen-ccen)/(cvc1(ic+1)-cvc1(ic)) -380 continue - else if (maxlen .eq. leny) then - do 390 j = fbloi2, fbhii2 - fn = j-fslo(2)+1 - jc = IX_PROJ(j,lratio) - fcen = half*(fvc2(j)+fvc2(j+1)) - ccen = half*(cvc2(jc)+cvc2(jc+1)) - voff(fn) = (fcen-ccen)/(cvc2(jc+1)-cvc2(jc)) -390 continue - else - do 400 k = fbloi3, fbhii3 - fn = k-fslo(3)+1 - kc = IX_PROJ(k,lratio) - fcen = half*(fvc3(k)+fvc3(k+1)) - ccen = half*(cvc3(kc)+cvc3(kc+1)) - voff(fn) = (fcen-ccen)/(cvc3(kc+1)-cvc3(kc)) -400 continue - end if - do 410 n = 1, nvar - -c ::: ::::: compute slopes in x direction - do 420 i = 1, clen - cen = half*(crse(i+ist,n)-crse(i-ist,n)) - forw = crse(i+ist,n)-crse(i,n) - back = crse(i,n)-crse(i-ist,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,1)=merge(slp,zero,forw*back>=0.0d0) -420 continue -c ::: ::::: compute slopes in y direction - do 430 i = 1, clen - cen = half*(crse(i+jst,n)-crse(i-jst,n)) - forw = crse(i+jst,n)-crse(i,n) - back = crse(i,n)-crse(i-jst,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,2)=merge(slp,zero,forw*back>=0.0d0) -430 continue -c ::: ::::: compute slopes in z direction - do 440 i = 1, clen - cen = half*(crse(i+kst,n)-crse(i-kst,n)) - forw = crse(i+kst,n)-crse(i,n) - back = crse(i,n)-crse(i-kst,n) - slp = sign(one,cen)*min(abs(cen),abs(forw),abs(back)) - cslope(i,3)=merge(slp,zero,forw*back>=0.0d0) -440 continue - - if (maxlen .eq. lenx) then - do 450 kc = cblo(3), cbhi(3) - do 460 jc = cblo(2), cbhi(2) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 470 ioff = 1, lratio - icc = clo + ist + jst*(jc-jslo) + kst*(kc- - $ kslo) - istart = ioff - iend = ioff + (ncbx-1)*lratio - do 480 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fslope(fn,3) = cslope(icc,3) - fdat(fn) = crse(icc,n) - icc = icc + ist -480 continue -470 continue - - do 490 koff = 0, lratio-1 - k = lratio*kc + koff - if (k .lt. fbloi3) then - goto 490 -c --- next --- - end if - if (k .gt. fbhii3) then - goto 491 -c --- break --- - end if - fcen = half*(fvc3(k)+fvc3(k+1)) - ccen = half*(cvc3(kc)+cvc3(kc+1)) - zoff = (fcen-ccen)/(cvc3(kc+1)-cvc3(kc)) - do 500 joff = 0, lratio-1 - j = lratio*jc + joff - if (j .lt. fbloi2) then - goto 500 -c --- next --- - end if - if (j .gt. fbhii2) then - goto 501 -c --- break --- - end if - fcen = half*(fvc2(j)+fvc2(j+1)) - ccen = half*(cvc2(jc)+cvc2(jc+1)) - yoff = (fcen-ccen)/(cvc2(jc+1)-cvc2(jc)) - - do 510 i = fbloi1, fbhii1 - fn = i-fslo(1)+1 - fine(i,j,k,n) = fdat(fn) + voff(fn)* - $ fslope(fn,1)+yoff*fslope(fn,2)+ zoff* - $ fslope(fn,3) -510 continue -500 continue -501 continue -490 continue -491 continue -460 continue -450 continue - else if (maxlen .eq. leny) then - do 520 kc = cblo(3), cbhi(3) - do 530 ic = cblo(1), cbhi(1) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 540 joff = 1, lratio - icc = clo + ist*(ic-islo) + jst + kst*(kc- - $ kslo) - istart = joff - iend = joff + (ncby-1)*lratio - do 550 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fslope(fn,3) = cslope(icc,3) - fdat(fn) = crse(icc,n) - icc = icc + jst -550 continue -540 continue - - do 560 koff = 0, lratio-1 - k = lratio*kc + koff - if (k .lt. fbloi3) then - goto 560 -c --- next --- - end if - if (k .gt. fbhii3) then - goto 561 -c --- break --- - end if - fcen = half*(fvc3(k)+fvc3(k+1)) - ccen = half*(cvc3(kc)+cvc3(kc+1)) - zoff = (fcen-ccen)/(cvc3(kc+1)-cvc3(kc)) - do 570 ioff = 0, lratio-1 - i = lratio*ic + ioff - if (i .lt. fbloi1) then - goto 570 -c --- next --- - end if - if (i .gt. fbhii1) then - goto 571 -c --- break --- - end if - fcen = half*(fvc1(i)+fvc1(i+1)) - ccen = half*(cvc1(ic)+cvc1(ic+1)) - xoff = (fcen-ccen)/(cvc1(ic+1)-cvc1(ic)) - - do 580 j = fbloi2, fbhii2 - fn = j-fslo(2)+1 - fine(i,j,k,n) = fdat(fn) + xoff* - $ fslope(fn,1)+voff(fn)*fslope(fn,2)+ - $ zoff*fslope(fn,3) -580 continue -570 continue -571 continue -560 continue -561 continue -530 continue -520 continue - else - do 590 jc = cblo(2), cbhi(2) - do 600 ic = cblo(1), cbhi(1) - -c ::: ..,.......::::: strip out a fine grid slope vector - do 610 koff = 1, lratio - icc = clo + ist*(ic-islo) + jst*(jc-jslo) + - $ kst - istart = koff - iend = koff + (ncbz-1)*lratio - do 620 fn = istart, iend, lratio - fslope(fn,1) = cslope(icc,1) - fslope(fn,2) = cslope(icc,2) - fslope(fn,3) = cslope(icc,3) - fdat(fn) = crse(icc,n) - icc = icc + kst -620 continue -610 continue - - do 630 joff = 0, lratio-1 - j = lratio*jc + joff - if (j .lt. fbloi2) then - goto 630 -c --- next --- - end if - if (j .gt. fbhii2) then - goto 631 -c --- break --- - end if - fcen = half*(fvc2(j)+fvc2(j+1)) - ccen = half*(cvc2(jc)+cvc2(jc+1)) - yoff = (fcen-ccen)/(cvc2(jc+1)-cvc2(jc)) - do 640 ioff = 0, lratio-1 - i = lratio*ic + ioff - if (i .lt. fbloi1) then - goto 640 -c --- next --- - end if - if (i .gt. fbhii1) then - goto 641 -c --- break --- - end if - fcen = half*(fvc1(i)+fvc1(i+1)) - ccen = half*(cvc1(ic)+cvc1(ic+1)) - xoff = (fcen-ccen)/(cvc1(ic+1)-cvc1(ic)) - - do 650 k = fbloi3, fbhii3 - fn = k-fslo(3)+1 - fine(i,j,k,n) = fdat(fn) + xoff* - $ fslope(fn,1)+yoff*fslope(fn,2)+ - $ voff(fn)*fslope(fn,3) -650 continue -640 continue -641 continue -630 continue -631 continue -600 continue -590 continue - end if -410 continue - - return - end - - diff --git a/Src/Extern/amrdata/CMakeLists.txt b/Src/Extern/amrdata/CMakeLists.txt index 10761f6a641..d17fcddec19 100644 --- a/Src/Extern/amrdata/CMakeLists.txt +++ b/Src/Extern/amrdata/CMakeLists.txt @@ -12,7 +12,6 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_WritePlotFile.H AMReX_WritePlotFile.cpp AMReX_AmrvisConstants.H - AMReX_FABUTIL_${D}D.F ) if (AMReX_PROFPARSER) diff --git a/Src/Extern/amrdata/Make.package b/Src/Extern/amrdata/Make.package index 3f3769ad174..9368e0669b3 100644 --- a/Src/Extern/amrdata/Make.package +++ b/Src/Extern/amrdata/Make.package @@ -1,6 +1,5 @@ CEXE_sources += AMReX_AmrData.cpp AMReX_XYPlotDataList.cpp AMReX_DataServices.cpp AMReX_WritePlotFile.cpp CEXE_headers += AMReX_AmrData.H AMReX_AmrvisConstants.H AMReX_XYPlotDataList.H AMReX_DataServices.H AMReX_WritePlotFile.H -FEXE_sources += AMReX_FABUTIL_${DIM}D.F VPATH_LOCATIONS += $(AMREX_HOME)/Src/Extern/amrdata INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Extern/amrdata diff --git a/Src/F_Interfaces/Base/AMReX_boxarray_fi.cpp b/Src/F_Interfaces/Base/AMReX_boxarray_fi.cpp index 248ae076b6f..dd7916a9adf 100644 --- a/Src/F_Interfaces/Base/AMReX_boxarray_fi.cpp +++ b/Src/F_Interfaces/Base/AMReX_boxarray_fi.cpp @@ -47,6 +47,11 @@ extern "C" { ba->maxSize(iv); } + Long amrex_fi_boxarray_nboxes (const BoxArray* ba) + { + return ba->size(); + } + void amrex_fi_boxarray_get_box (const BoxArray* ba, int i, int lo[3], int hi[3]) { const Box& bx = (*ba)[i]; diff --git a/Src/F_Interfaces/Base/AMReX_boxarray_mod.F90 b/Src/F_Interfaces/Base/AMReX_boxarray_mod.F90 index 902f2925425..b156a8ae996 100644 --- a/Src/F_Interfaces/Base/AMReX_boxarray_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_boxarray_mod.F90 @@ -20,6 +20,7 @@ module amrex_boxarray_module procedure :: move => amrex_boxarray_move ! transfer ownership generic :: maxSize => amrex_boxarray_maxsize_int, & ! make the boxes smaller & amrex_boxarray_maxsize_int3, amrex_boxarray_maxsize_iv + procedure :: nboxes => amrex_boxarray_nboxes procedure :: get_box => amrex_boxarray_get_box procedure :: nodal_type => amrex_boxarray_nodal_type ! get index type procedure :: num_pts => amrex_boxarray_num_pts @@ -82,6 +83,13 @@ subroutine amrex_fi_boxarray_maxsize (ba,s) bind(c) integer(c_int), intent(in) :: s(3) end subroutine amrex_fi_boxarray_maxsize + pure function amrex_fi_boxarray_nboxes (ba) bind(c) + import + implicit none + type(c_ptr), value, intent(in) :: ba + integer(amrex_long) :: amrex_fi_boxarray_nboxes + end function amrex_fi_boxarray_nboxes + subroutine amrex_fi_boxarray_get_box (ba,i,lo,hi) bind(c) import implicit none @@ -194,6 +202,16 @@ subroutine amrex_boxarray_maxsize_iv (this, s) call amrex_fi_boxarray_maxsize(this%p, s) end subroutine amrex_boxarray_maxsize_iv + pure function amrex_boxarray_nboxes (this) result(n) + class(amrex_boxarray), intent(in) :: this + integer(amrex_long) :: n + if (c_associated(this%p)) then + n = amrex_fi_boxarray_nboxes(this%p) + else + n = 0 + end if + end function amrex_boxarray_nboxes + function amrex_boxarray_get_box (this, i) result(bx) class(amrex_boxarray) :: this integer, intent(in) :: i @@ -220,7 +238,11 @@ end function amrex_boxarray_nodal_type pure function amrex_boxarray_num_pts (this) result(n) class(amrex_boxarray), intent(in) :: this integer(amrex_long) :: n - n = amrex_fi_boxarray_numpts(this%p) + if (c_associated(this%p)) then + n = amrex_fi_boxarray_numpts(this%p) + else + n = 0 + end if end function amrex_boxarray_num_pts pure function amrex_boxarray_intersects_box (this, bx) result(r) diff --git a/Src/LinearSolvers/CMakeLists.txt b/Src/LinearSolvers/CMakeLists.txt index cae0b2028f0..6287ef4b422 100644 --- a/Src/LinearSolvers/CMakeLists.txt +++ b/Src/LinearSolvers/CMakeLists.txt @@ -21,6 +21,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) MLMG/AMReX_MLCellABecLap_K.H MLMG/AMReX_MLCellABecLap_${D}D_K.H MLMG/AMReX_MLCGSolver.H + MLMG/AMReX_PCGSolver.H MLMG/AMReX_MLABecLaplacian.H MLMG/AMReX_MLABecLap_K.H MLMG/AMReX_MLABecLap_${D}D_K.H diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_1D_K.H index fbf324d6c98..29f8fd9f7e9 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_1D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_1D_K.H @@ -157,6 +157,69 @@ void abec_gsrb_os (int i, int, int, int n, Array4 const& phi, Array4 template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void abec_jacobi (int i, int, int, int n, Array4 const& phi, + Array4 const& rhs, Array4 const& Ax, + T alpha, Array4 const& a, + T dhx, + Array4 const& bX, + Array4 const& m0, + Array4 const& m1, + Array4 const& f0, + Array4 const& f1, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,0,0) > 0) + ? f0(vlo.x,0,0,n) : T(0.0); + T cf1 = (i == vhi.x && m1(vhi.x+1,0,0) > 0) + ? f1(vhi.x,0,0,n) : T(0.0); + + T delta = dhx*(bX(i,0,0,n)*cf0 + bX(i+1,0,0,n)*cf1); + + T gamma = alpha*a(i,0,0) + + dhx*( bX(i,0,0,n) + bX(i+1,0,0,n) ); + + phi(i,0,0,n) += T(2.0/3.0) * (rhs(i,0,0,n) - Ax(i,0,0,n)) / (gamma - delta); +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void abec_jacobi_os (int i, int, int, int n, Array4 const& phi, + Array4 const& rhs, Array4 const& Ax, + T alpha, Array4 const& a, + T dhx, + Array4 const& bX, + Array4 const& m0, + Array4 const& m1, + Array4 const& f0, + Array4 const& f1, + Array4 const& osm, + Box const& vbox) noexcept +{ + if (osm(i,0,0) == 0) { + phi(i,0,0) = T(0.0); + } else { + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,0,0) > 0) + ? f0(vlo.x,0,0,n) : T(0.0); + T cf1 = (i == vhi.x && m1(vhi.x+1,0,0) > 0) + ? f1(vhi.x,0,0,n) : T(0.0); + + T delta = dhx*(bX(i,0,0,n)*cf0 + bX(i+1,0,0,n)*cf1); + + T gamma = alpha*a(i,0,0) + + dhx*( bX(i,0,0,n) + bX(i+1,0,0,n) ); + + phi(i,0,0,n) += T(2.0/3.0) * (rhs(i,0,0,n) - Ax(i,0,0,n)) / (gamma - delta); + } +} + +template +AMREX_FORCE_INLINE void abec_gsrb_with_line_solve ( Box const& /*box*/, Array4 const& /*phi*/, Array4 const& /*rhs*/, T /*alpha*/, Array4 const& /*a*/, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_2D_K.H index 9184a755151..2beecd9d422 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_2D_K.H @@ -230,6 +230,81 @@ void abec_gsrb_os (int i, int j, int, int n, Array4 const& phi, Array4 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void abec_jacobi (int i, int j, int, int n, Array4 const& phi, + Array4 const& rhs, Array4< T const> const& Ax, + T alpha, Array4 const& a, + T dhx, T dhy, + Array4 const& bX, Array4 const& bY, + Array4 const& m0, Array4 const& m2, + Array4 const& m1, Array4 const& m3, + Array4 const& f0, Array4 const& f2, + Array4 const& f1, Array4 const& f3, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,j,0) > 0) + ? f0(vlo.x,j,0,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,0) > 0) + ? f1(i,vlo.y,0,n) : T(0.0); + T cf2 = (i == vhi.x && m2(vhi.x+1,j,0) > 0) + ? f2(vhi.x,j,0,n) : T(0.0); + T cf3 = (j == vhi.y && m3(i,vhi.y+1,0) > 0) + ? f3(i,vhi.y,0,n) : T(0.0); + + T delta = dhx*(bX(i,j,0,n)*cf0 + bX(i+1,j,0,n)*cf2) + + dhy*(bY(i,j,0,n)*cf1 + bY(i,j+1,0,n)*cf3); + + T gamma = alpha*a(i,j,0) + + dhx*( bX(i,j,0,n) + bX(i+1,j,0,n) ) + + dhy*( bY(i,j,0,n) + bY(i,j+1,0,n) ); + + phi(i,j,0,n) += T(2.0/3.0) * (rhs(i,j,0,n) - Ax(i,j,0,n)) / (gamma - delta); +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void abec_jacobi_os (int i, int j, int, int n, Array4 const& phi, + Array4 const& rhs, Array4 const& Ax, + T alpha, Array4 const& a, + T dhx, T dhy, + Array4 const& bX, Array4 const& bY, + Array4 const& m0, Array4 const& m2, + Array4 const& m1, Array4 const& m3, + Array4 const& f0, Array4 const& f2, + Array4 const& f1, Array4 const& f3, + Array4 const& osm, + Box const& vbox) noexcept +{ + if (osm(i,j,0) == 0) { + phi(i,j,0,n) = T(0.0); + } else { + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,j,0) > 0) + ? f0(vlo.x,j,0,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,0) > 0) + ? f1(i,vlo.y,0,n) : T(0.0); + T cf2 = (i == vhi.x && m2(vhi.x+1,j,0) > 0) + ? f2(vhi.x,j,0,n) : T(0.0); + T cf3 = (j == vhi.y && m3(i,vhi.y+1,0) > 0) + ? f3(i,vhi.y,0,n) : T(0.0); + + T delta = dhx*(bX(i,j,0,n)*cf0 + bX(i+1,j,0,n)*cf2) + + dhy*(bY(i,j,0,n)*cf1 + bY(i,j+1,0,n)*cf3); + + T gamma = alpha*a(i,j,0) + + dhx*( bX(i,j,0,n) + bX(i+1,j,0,n) ) + + dhy*( bY(i,j,0,n) + bY(i,j+1,0,n) ); + + phi(i,j,0,n) += T(2.0/3.0) * (rhs(i,j,0,n) - Ax(i,j,0,n)) / (gamma - delta); + } +} + +template +AMREX_FORCE_INLINE void abec_gsrb_with_line_solve ( Box const& box, Array4 const& phi, Array4 const& rhs, T alpha, Array4 const& a, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H index 7d6cca59b49..bb5172396cf 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H @@ -329,6 +329,106 @@ void abec_gsrb_os (int i, int j, int k, int n, template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void abec_jacobi (int i, int j, int k, int n, Array4 const& phi, + Array4 const& rhs, Array4 const& Ax, + T alpha, Array4 const& a, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, + Array4 const& m0, Array4 const& m2, + Array4 const& m4, + Array4 const& m1, Array4 const& m3, + Array4 const& m5, + Array4 const& f0, Array4 const& f2, + Array4 const& f4, + Array4 const& f1, Array4 const& f3, + Array4 const& f5, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k,n) : T(0.0); + T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) + ? f2(i,j,vlo.z,n) : T(0.0); + T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) + ? f3(vhi.x,j,k,n) : T(0.0); + T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) + ? f4(i,vhi.y,k,n) : T(0.0); + T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) + ? f5(i,j,vhi.z,n) : T(0.0); + + T gamma = alpha*a(i,j,k) + + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n)) + + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n)) + + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n)); + + T g_m_d = gamma + - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) + + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) + + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5)); + + phi(i,j,k,n) += T(2.0/3.0) * (rhs(i,j,k,n) - Ax(i,j,k,n)) / g_m_d; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void abec_jacobi_os (int i, int j, int k, int n, + Array4 const& phi, Array4 const& rhs, + Array4 const& Ax, + T alpha, Array4 const& a, + T dhx, T dhy, T dhz, + Array4 const& bX, Array4 const& bY, + Array4 const& bZ, + Array4 const& m0, Array4 const& m2, + Array4 const& m4, + Array4 const& m1, Array4 const& m3, + Array4 const& m5, + Array4 const& f0, Array4 const& f2, + Array4 const& f4, + Array4 const& f1, Array4 const& f3, + Array4 const& f5, + Array4 const& osm, + Box const& vbox) noexcept +{ + if (osm(i,j,k) == 0) { + phi(i,j,k,n) = T(0.0); + } else { + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k,n) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k,n) : T(0.0); + T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) + ? f2(i,j,vlo.z,n) : T(0.0); + T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) + ? f3(vhi.x,j,k,n) : T(0.0); + T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) + ? f4(i,vhi.y,k,n) : T(0.0); + T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) + ? f5(i,j,vhi.z,n) : T(0.0); + + T gamma = alpha*a(i,j,k) + + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n)) + + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n)) + + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n)); + + T g_m_d = gamma + - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3) + + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4) + + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5)); + + phi(i,j,k,n) += T(2.0/3.0) * (rhs(i,j,k,n) - Ax(i,j,k,n)) / g_m_d; + } +} + +template +AMREX_FORCE_INLINE void tridiagonal_solve (Array1D& a_ls, Array1D& b_ls, Array1D& c_ls, Array1D& r_ls, Array1D& u_ls, Array1D& gam, int ilen ) noexcept @@ -348,7 +448,7 @@ void tridiagonal_solve (Array1D& a_ls, Array1D& b_ls, Array1D -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +AMREX_FORCE_INLINE void abec_gsrb_with_line_solve ( Box const& box, Array4 const& phi, Array4 const& rhs, T alpha, Array4 const& a, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H index 0f90a6ead17..9b56b8049df 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H @@ -864,6 +864,12 @@ MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in regular_coarsening = this->mg_coarsen_ratio_vec[mglev-1] == this->mg_coarsen_ratio; } + MF Ax; + if (! this->m_use_gauss_seidel && regular_coarsening) { // jacobi + Ax.define(sol.boxArray(), sol.DistributionMap(), sol.nComp(), 0); + Fapply(amrlev, mglev, Ax, sol); + } + const MF& acoef = m_a_coeffs[amrlev][mglev]; AMREX_ALWAYS_ASSERT(acoef.nGrowVect() == 0); AMREX_D_TERM(const MF& bxcoef = m_b_coeffs[amrlev][mglev][0];, @@ -939,40 +945,76 @@ MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in if (this->m_overset_mask[amrlev][mglev]) { const auto& osmma = this->m_overset_mask[amrlev][mglev]->const_arrays(); - ParallelFor(sol, IntVect(0), nc, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept - { - Box vbx(ama[box_no]); - abec_gsrb_os(i,j,k,n, solnma[box_no], rhsma[box_no], alpha, ama[box_no], - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), - AMREX_D_DECL(m0ma[box_no],m2ma[box_no],m4ma[box_no]), - AMREX_D_DECL(m1ma[box_no],m3ma[box_no],m5ma[box_no]), - AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), - AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), - osmma[box_no], vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + ParallelFor(sol, IntVect(0), nc, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept + { + Box vbx(ama[box_no]); + abec_gsrb_os(i,j,k,n, solnma[box_no], rhsma[box_no], alpha, ama[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + AMREX_D_DECL(m0ma[box_no],m2ma[box_no],m4ma[box_no]), + AMREX_D_DECL(m1ma[box_no],m3ma[box_no],m5ma[box_no]), + AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), + AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), + osmma[box_no], vbx, redblack); + }); + } else { + const auto& axma = Ax.const_arrays(); + ParallelFor(sol, IntVect(0), nc, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept + { + Box vbx(ama[box_no]); + abec_jacobi_os(i,j,k,n, solnma[box_no], rhsma[box_no], axma[box_no], + alpha, ama[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + AMREX_D_DECL(m0ma[box_no],m2ma[box_no],m4ma[box_no]), + AMREX_D_DECL(m1ma[box_no],m3ma[box_no],m5ma[box_no]), + AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), + AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), + osmma[box_no], vbx); + }); + } } else if (regular_coarsening) { - ParallelFor(sol, IntVect(0), nc, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept - { - Box vbx(ama[box_no]); - abec_gsrb(i,j,k,n, solnma[box_no], rhsma[box_no], alpha, ama[box_no], - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), - AMREX_D_DECL(m0ma[box_no],m2ma[box_no],m4ma[box_no]), - AMREX_D_DECL(m1ma[box_no],m3ma[box_no],m5ma[box_no]), - AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), - AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), - vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + ParallelFor(sol, IntVect(0), nc, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept + { + Box vbx(ama[box_no]); + abec_gsrb(i,j,k,n, solnma[box_no], rhsma[box_no], alpha, ama[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + AMREX_D_DECL(m0ma[box_no],m2ma[box_no],m4ma[box_no]), + AMREX_D_DECL(m1ma[box_no],m3ma[box_no],m5ma[box_no]), + AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), + AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), + vbx, redblack); + }); + } else { + const auto& axma = Ax.const_arrays(); + ParallelFor(sol, IntVect(0), nc, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept + { + Box vbx(ama[box_no]); + abec_jacobi(i,j,k,n, solnma[box_no], rhsma[box_no], axma[box_no], + alpha, ama[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]), + AMREX_D_DECL(m0ma[box_no],m2ma[box_no],m4ma[box_no]), + AMREX_D_DECL(m1ma[box_no],m3ma[box_no],m5ma[box_no]), + AMREX_D_DECL(f0ma[box_no],f2ma[box_no],f4ma[box_no]), + AMREX_D_DECL(f1ma[box_no],f3ma[box_no],f5ma[box_no]), + vbx); + }); + } } Gpu::streamSynchronize(); } else #endif { MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) { mfi_info.EnableTiling().SetDynamic(true); } + mfi_info.EnableTiling().SetDynamic(true); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) @@ -1013,43 +1055,71 @@ MLABecLaplacianT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in if (this->m_overset_mask[amrlev][mglev]) { const auto& osm = this->m_overset_mask[amrlev][mglev]->const_array(mfi); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D(tbx, nc, i, j, k, n, - { - abec_gsrb_os(i,j,k,n, solnfab, rhsfab, alpha, afab, - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_D_DECL(bxfab, byfab, bzfab), - AMREX_D_DECL(m0,m2,m4), - AMREX_D_DECL(m1,m3,m5), - AMREX_D_DECL(f0fab,f2fab,f4fab), - AMREX_D_DECL(f1fab,f3fab,f5fab), - osm, vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_LOOP_4D(tbx, nc, i, j, k, n, + { + abec_gsrb_os(i,j,k,n, solnfab, rhsfab, alpha, afab, + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxfab, byfab, bzfab), + AMREX_D_DECL(m0,m2,m4), + AMREX_D_DECL(m1,m3,m5), + AMREX_D_DECL(f0fab,f2fab,f4fab), + AMREX_D_DECL(f1fab,f3fab,f5fab), + osm, vbx, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_LOOP_4D(tbx, nc, i, j, k, n, + { + abec_jacobi_os(i,j,k,n, solnfab, rhsfab, axfab, + alpha, afab, + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxfab, byfab, bzfab), + AMREX_D_DECL(m0,m2,m4), + AMREX_D_DECL(m1,m3,m5), + AMREX_D_DECL(f0fab,f2fab,f4fab), + AMREX_D_DECL(f1fab,f3fab,f5fab), + osm, vbx); + }); + } } else if (regular_coarsening) { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D(tbx, nc, i, j, k, n, - { - abec_gsrb(i,j,k,n, solnfab, rhsfab, alpha, afab, - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_D_DECL(bxfab, byfab, bzfab), - AMREX_D_DECL(m0,m2,m4), - AMREX_D_DECL(m1,m3,m5), - AMREX_D_DECL(f0fab,f2fab,f4fab), - AMREX_D_DECL(f1fab,f3fab,f5fab), - vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_LOOP_4D(tbx, nc, i, j, k, n, + { + abec_gsrb(i,j,k,n, solnfab, rhsfab, alpha, afab, + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxfab, byfab, bzfab), + AMREX_D_DECL(m0,m2,m4), + AMREX_D_DECL(m1,m3,m5), + AMREX_D_DECL(f0fab,f2fab,f4fab), + AMREX_D_DECL(f1fab,f3fab,f5fab), + vbx, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_LOOP_4D(tbx, nc, i, j, k, n, + { + abec_jacobi(i,j,k,n, solnfab, rhsfab, axfab, + alpha, afab, + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxfab, byfab, bzfab), + AMREX_D_DECL(m0,m2,m4), + AMREX_D_DECL(m1,m3,m5), + AMREX_D_DECL(f0fab,f2fab,f4fab), + AMREX_D_DECL(f1fab,f3fab,f5fab), + vbx); + }); + } } else { - Gpu::LaunchSafeGuard lsg(false); // xxxxx gpu todo // line solve does not with with GPU - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, thread_box, - { - abec_gsrb_with_line_solve(thread_box, solnfab, rhsfab, alpha, afab, - AMREX_D_DECL(dhx, dhy, dhz), - AMREX_D_DECL(bxfab, byfab, bzfab), - AMREX_D_DECL(m0,m2,m4), - AMREX_D_DECL(m1,m3,m5), - AMREX_D_DECL(f0fab,f2fab,f4fab), - AMREX_D_DECL(f1fab,f3fab,f5fab), - vbx, redblack, nc); - }); + abec_gsrb_with_line_solve(tbx, solnfab, rhsfab, alpha, afab, + AMREX_D_DECL(dhx, dhy, dhz), + AMREX_D_DECL(bxfab, byfab, bzfab), + AMREX_D_DECL(m0,m2,m4), + AMREX_D_DECL(m1,m3,m5), + AMREX_D_DECL(f0fab,f2fab,f4fab), + AMREX_D_DECL(f1fab,f3fab,f5fab), + vbx, redblack, nc); } } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index db8a10f2080..b613a4f3a83 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -396,6 +396,7 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( !initial_vec_zeroed ) { LocalAdd(sol, sorig, 0, 0, ncomp, nghost); } + if (ret == 8) { ret = 9; } } else { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H index 02f7adeb115..970cf48fc20 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H @@ -562,8 +562,8 @@ MLCellABecLapT::addInhomogNeumannFlux ( if (! domain.contains(ccb)) { for (int icomp = 0; icomp < ncomp; ++icomp) { auto const& phi = sol.const_array(mfi,icomp); - auto const bv = bndry.bndryValues(ori).multiFab().const_array(mfi,icomp); - auto const bc = bcoef[idim] ? bcoef[idim]->const_array(mfi,icomp) + auto const& bv = bndry.bndryValues(ori).multiFab().const_array(mfi,icomp); + auto const& bc = bcoef[idim] ? bcoef[idim]->const_array(mfi,icomp) : Array4{}; auto const& f = grad[idim]->array(mfi,icomp); if (ori.isLow()) { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index 536d4c82b04..e04e16f8bd6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -55,6 +55,8 @@ public: } void update () override; + void setGaussSeidel (bool flag) noexcept { m_use_gauss_seidel = flag; } + virtual bool isCrossStencil () const { return true; } virtual bool isTensorOp () const { return false; } @@ -86,6 +88,8 @@ public: void solutionResidual (int amrlev, MF& resid, MF& x, const MF& b, const MF* crse_bcdata=nullptr) override; + void prepareForFluxes (int amrlev, const MF* crse_bcdata = nullptr) override; + void correctionResidual (int amrlev, int mglev, MF& resid, MF& x, const MF& b, BCMode bc_mode, const MF* crse_bcdata=nullptr) final; @@ -133,6 +137,10 @@ public: Vector > m_robin_bcval; +#ifdef AMREX_USE_HYPRE + void setInterpBndryHalfWidth (int w) { m_interpbndry_halfwidth = w; } +#endif + protected: bool m_has_metric_term = false; @@ -195,6 +203,8 @@ protected: mutable Vector> m_fluxreg; + bool m_use_gauss_seidel = true; // use red-black Gauss-Seidel by default + private: void defineAuxData (); @@ -202,6 +212,8 @@ private: void computeVolInv () const; mutable Vector > m_volinv; // used by solvability fix + + int m_interpbndry_halfwidth = 2; }; template @@ -472,7 +484,9 @@ MLCellLinOpT::defineBC () bc_data.setVal(0.0); m_bndry_cor[amrlev]->setBndryValues(*m_crse_cor_br[amrlev], 0, bc_data, 0, 0, ncomp, - IntVect(this->m_amr_ref_ratio[amrlev-1])); + IntVect(this->m_amr_ref_ratio[amrlev-1]), + InterpBndryDataT::IBD_max_order_DEF, + m_interpbndry_halfwidth); Vector > bclohi (ncomp,Array{{AMREX_D_DECL(BCType::Dirichlet, @@ -544,7 +558,9 @@ MLCellLinOpT::setLevelBC (int amrlev, const MF* a_levelbcdata, const MF* rob m_crse_sol_br[amrlev]->setVal(RT(0.0)); } m_bndry_sol[amrlev]->setBndryValues(*m_crse_sol_br[amrlev], 0, - bcdata, 0, 0, ncomp, br_ref_ratio); + bcdata, 0, 0, ncomp, br_ref_ratio, + InterpBndryDataT::IBD_max_order_DEF, + m_interpbndry_halfwidth); br_ref_ratio = this->m_coarse_data_crse_ratio; } else @@ -639,7 +655,9 @@ MLCellLinOpT::updateSolBC (int amrlev, const MF& crse_bcdata) const m_crse_sol_br[amrlev]->copyFrom(crse_bcdata, 0, 0, 0, ncomp, this->m_geom[amrlev-1][0].periodicity()); m_bndry_sol[amrlev]->updateBndryValues(*m_crse_sol_br[amrlev], 0, 0, ncomp, - IntVect(this->m_amr_ref_ratio[amrlev-1])); + IntVect(this->m_amr_ref_ratio[amrlev-1]), + InterpBndryDataT::IBD_max_order_DEF, + m_interpbndry_halfwidth); } template @@ -652,7 +670,9 @@ MLCellLinOpT::updateCorBC (int amrlev, const MF& crse_bcdata) const m_crse_cor_br[amrlev]->copyFrom(crse_bcdata, 0, 0, 0, ncomp, this->m_geom[amrlev-1][0].periodicity()); m_bndry_cor[amrlev]->updateBndryValues(*m_crse_cor_br[amrlev], 0, 0, ncomp, - IntVect(this->m_amr_ref_ratio[amrlev-1])); + IntVect(this->m_amr_ref_ratio[amrlev-1]), + InterpBndryDataT::IBD_max_order_DEF, + m_interpbndry_halfwidth); } template @@ -1210,6 +1230,15 @@ MLCellLinOpT::solutionResidual (int amrlev, MF& resid, MF& x, const MF& b, MF::Xpay(resid, RT(-1.0), b, 0, 0, ncomp, IntVect(0)); } +template +void +MLCellLinOpT::prepareForFluxes (int amrlev, const MF* crse_bcdata) +{ + if (crse_bcdata != nullptr) { + updateSolBC(amrlev, *crse_bcdata); + } +} + template void MLCellLinOpT::correctionResidual (int amrlev, int mglev, MF& resid, MF& x, const MF& b, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H index 8d461d3bb04..ce8859eae11 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H @@ -58,6 +58,8 @@ public: return std::string("curl of curl"); } + bool setUsePCG (bool flag) { return std::exchange(m_use_pcg, flag); } + void setLevelBC (int amrlev, const MF* levelbcdata, const MF* robinbc_a = nullptr, const MF* robinbc_b = nullptr, @@ -137,6 +139,7 @@ private: Vector>>>> m_lusolver; Vector,3>>> m_bcoefs; + bool m_use_pcg = false; }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp index 87853eb37bb..5e797e9a5b3 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp @@ -353,22 +353,36 @@ void MLCurlCurl::smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, auto* plusolver = m_lusolver[amrlev][mglev]->dataPtr(); ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) { - mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno],rhsx[bno],rhsy[bno],rhsz[bno], + mlcurlcurl_gs4_lu(i,j,k,ex[bno],ey[bno],ez[bno], + rhsx[bno],rhsy[bno],rhsz[bno], #if (AMREX_SPACEDIM == 2) - b, + b, #endif - adxinv,color,*plusolver,dinfo,sinfo); + adxinv,color,*plusolver,dinfo,sinfo); }); } else { auto const& bcx = m_bcoefs[amrlev][mglev][0]->const_arrays(); auto const& bcy = m_bcoefs[amrlev][mglev][1]->const_arrays(); auto const& bcz = m_bcoefs[amrlev][mglev][2]->const_arrays(); - ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) - { + if (m_use_pcg) { + ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + { - mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno],rhsx[bno],rhsy[bno],rhsz[bno], - adxinv,color,bcx[bno],bcy[bno],bcz[bno],dinfo,sinfo); - }); + mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno], + rhsx[bno],rhsy[bno],rhsz[bno], + adxinv,color,bcx[bno],bcy[bno],bcz[bno], + dinfo,sinfo); + }); + } else { + ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + { + + mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno], + rhsx[bno],rhsy[bno],rhsz[bno], + adxinv,color,bcx[bno],bcy[bno],bcz[bno], + dinfo,sinfo); + }); + } } Gpu::streamSynchronize(); } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H index 0c1118f7dd3..e243b245f51 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H @@ -4,6 +4,7 @@ #include #include +#include namespace amrex { @@ -427,20 +428,20 @@ void mlcurlcurl_adotx_z (int i, int j, int k, Array4 const& Az, } AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void mlcurlcurl_gs4 (int i, int j, int k, - Array4 const& ex, - Array4 const& ey, - Array4 const& ez, - Array4 const& rhsx, - Array4 const& rhsy, - Array4 const& rhsz, +void mlcurlcurl_gs4_lu (int i, int j, int k, + Array4 const& ex, + Array4 const& ey, + Array4 const& ez, + Array4 const& rhsx, + Array4 const& rhsy, + Array4 const& rhsz, #if (AMREX_SPACEDIM == 2) - Real beta, + Real beta, #endif - GpuArray const& adxinv, - int color, LUSolver const& lusolver, - CurlCurlDirichletInfo const& dinfo, - CurlCurlSymmetryInfo const& sinfo) + GpuArray const& adxinv, + int color, LUSolver const& lusolver, + CurlCurlDirichletInfo const& dinfo, + CurlCurlSymmetryInfo const& sinfo) { if (dinfo.is_dirichlet_node(i,j,k)) { return; } @@ -598,6 +599,7 @@ void mlcurlcurl_gs4 (int i, int j, int k, #endif } +template AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlcurlcurl_gs4 (int i, int j, int k, Array4 const& ex, @@ -661,55 +663,81 @@ void mlcurlcurl_gs4 (int i, int j, int k, + dxy * (-ex(i-1,j+1,k ) +ex(i ,j+1,k )))}; - GpuArray x; + GpuArray beta; if (sinfo.xlo_is_symmetric(i)) { b[0] = -b[1]; - x[0] = x[1] = betax(i,j,k); + beta[0] = beta[1] = betax(i,j,k); } else if (sinfo.xhi_is_symmetric(i)) { b[1] = -b[0]; - x[0] = x[1] = betax(i-1,j,k); + beta[0] = beta[1] = betax(i-1,j,k); } else { - x[0] = betax(i-1,j,k); - x[1] = betax(i ,j,k); + beta[0] = betax(i-1,j,k); + beta[1] = betax(i ,j,k); } if (sinfo.ylo_is_symmetric(j)) { b[2] = -b[3]; - x[2] = x[3] = betay(i,j,k); + beta[2] = beta[3] = betay(i,j,k); } else if (sinfo.yhi_is_symmetric(j)) { b[3] = -b[2]; - x[2] = x[3] = betay(i,j-1,k); + beta[2] = beta[3] = betay(i,j-1,k); } else { - x[2] = betay(i,j-1,k); - x[3] = betay(i,j ,k); + beta[2] = betay(i,j-1,k); + beta[3] = betay(i,j ,k); } - LUSolver<4,Real> lusolver - ({dyy*Real(2.0) + x[0], - Real(0.0), - -dxy, - dxy, - // - Real(0.0), - dyy*Real(2.0) + x[1], - dxy, - -dxy, - // - -dxy, - dxy, - dxx*Real(2.0) + x[2], - Real(0.0), - // - dxy, - -dxy, - Real(0.0), - dxx*Real(2.0) + x[3]}); - lusolver(x.data(), b.data()); - ex(i-1,j ,k ) = x[0]; - ex(i ,j ,k ) = x[1]; - ey(i ,j-1,k ) = x[2]; - ey(i ,j ,k ) = x[3]; + if constexpr (PCG) { + Real diagInv[4] = {Real(1.0) / (dyy*Real(2.0) + beta[0]), + Real(1.0) / (dyy*Real(2.0) + beta[1]), + Real(1.0) / (dxx*Real(2.0) + beta[2]), + Real(1.0) / (dxx*Real(2.0) + beta[3])}; + auto precond = [&] (Real * AMREX_RESTRICT z, + Real const* AMREX_RESTRICT r) + { + for (int m = 0; m < 4; ++m) { z[m] = r[m] * diagInv[m]; } + }; + auto mat = [&] (Real * AMREX_RESTRICT Av, + Real const* AMREX_RESTRICT v) + { + Av[0] = (dyy*Real(2.0) + beta[0]) * v[0] - dxy * v[2] + dxy * v[3]; + Av[1] = (dyy*Real(2.0) + beta[1]) * v[1] + dxy * v[2] - dxy * v[3]; + Av[2] = -dxy * v[0] + dxy * v[1] + (dxx*Real(2.0) + beta[2]) * v[2]; + Av[3] = dxy * v[0] - dxy * v[1] + (dxx*Real(2.0) + beta[3]) * v[3]; + }; + Real sol[4] = {0, 0, 0, 0}; + pcg_solve<4>(sol, b.data(), mat, precond, 8, Real(1.e-8)); + ex(i-1,j ,k ) = sol[0]; + ex(i ,j ,k ) = sol[1]; + ey(i ,j-1,k ) = sol[2]; + ey(i ,j ,k ) = sol[3]; + } else { + LUSolver<4,Real> lusolver + ({dyy*Real(2.0) + beta[0], + Real(0.0), + -dxy, + dxy, + // + Real(0.0), + dyy*Real(2.0) + beta[1], + dxy, + -dxy, + // + -dxy, + dxy, + dxx*Real(2.0) + beta[2], + Real(0.0), + // + dxy, + -dxy, + Real(0.0), + dxx*Real(2.0) + beta[3]}); + lusolver(beta.data(), b.data()); + ex(i-1,j ,k ) = beta[0]; + ex(i ,j ,k ) = beta[1]; + ey(i ,j-1,k ) = beta[2]; + ey(i ,j ,k ) = beta[3]; + } #else @@ -772,90 +800,128 @@ void mlcurlcurl_gs4 (int i, int j, int k, + dyz * (-ey(i ,j-1,k+1) +ey(i ,j ,k+1)))}; - GpuArray x; + GpuArray beta; if (sinfo.xlo_is_symmetric(i)) { b[0] = -b[1]; - x[0] = x[1] = betax(i,j,k); + beta[0] = beta[1] = betax(i,j,k); } else if (sinfo.xhi_is_symmetric(i)) { b[1] = -b[0]; - x[0] = x[1] = betax(i-1,j,k); + beta[0] = beta[1] = betax(i-1,j,k); } else { - x[0] = betax(i-1,j,k); - x[1] = betax(i ,j,k); + beta[0] = betax(i-1,j,k); + beta[1] = betax(i ,j,k); } if (sinfo.ylo_is_symmetric(j)) { b[2] = -b[3]; - x[2] = x[3] = betay(i,j,k); + beta[2] = beta[3] = betay(i,j,k); } else if (sinfo.yhi_is_symmetric(j)) { b[3] = -b[2]; - x[2] = x[3] = betay(i,j-1,k); + beta[2] = beta[3] = betay(i,j-1,k); } else { - x[2] = betay(i,j-1,k); - x[3] = betay(i,j ,k); + beta[2] = betay(i,j-1,k); + beta[3] = betay(i,j ,k); } if (sinfo.zlo_is_symmetric(k)) { b[4] = -b[5]; - x[4] = x[5] = betaz(i,j,k); + beta[4] = beta[5] = betaz(i,j,k); } else if (sinfo.zhi_is_symmetric(k)) { b[5] = -b[4]; - x[4] = x[5] = betaz(i,j,k-1); + beta[4] = beta[5] = betaz(i,j,k-1); } else { - x[4] = betaz(i,j,k-1); - x[5] = betaz(i,j,k ); + beta[4] = betaz(i,j,k-1); + beta[5] = betaz(i,j,k ); } - LUSolver<6,Real> lusolver - ({(dyy+dzz)*Real(2.0) + x[0], - Real(0.0), - -dxy, - dxy, - -dxz, - dxz, - // - Real(0.0), - (dyy+dzz)*Real(2.0) + x[1], - dxy, - -dxy, - dxz, - -dxz, - // - -dxy, - dxy, - (dxx+dzz)*Real(2.0) + x[2], - Real(0.0), - -dyz, - dyz, - // - dxy, - -dxy, - Real(0.0), - (dxx+dzz)*Real(2.0) + x[3], - dyz, - -dyz, - // - -dxz, - dxz, - -dyz, - dyz, - (dxx+dyy)*Real(2.0) + x[4], - Real(0.0), - // - dxz, - -dxz, - dyz, - -dyz, - Real(0.0), - (dxx+dyy)*Real(2.0) + x[5]}); - lusolver(x.data(), b.data()); - ex(i-1,j ,k ) = x[0]; - ex(i ,j ,k ) = x[1]; - ey(i ,j-1,k ) = x[2]; - ey(i ,j ,k ) = x[3]; - ez(i ,j ,k-1) = x[4]; - ez(i ,j ,k ) = x[5]; + if constexpr (PCG) { + Real diagInv[6] = {Real(1.0) / ((dyy+dzz)*Real(2.0) + beta[0]), + Real(1.0) / ((dyy+dzz)*Real(2.0) + beta[1]), + Real(1.0) / ((dxx+dzz)*Real(2.0) + beta[2]), + Real(1.0) / ((dxx+dzz)*Real(2.0) + beta[3]), + Real(1.0) / ((dxx+dyy)*Real(2.0) + beta[4]), + Real(1.0) / ((dxx+dyy)*Real(2.0) + beta[5])}; + auto precond = [&] (Real * AMREX_RESTRICT z, + Real const* AMREX_RESTRICT r) + { + for (int m = 0; m < 6; ++m) { z[m] = r[m] * diagInv[m]; } + }; + auto mat = [&] (Real * AMREX_RESTRICT Av, + Real const* AMREX_RESTRICT v) + { + Av[0] = ((dyy+dzz)*Real(2.0) + beta[0]) * v[0] - dxy * v[2] + + dxy * v[3] - dxz * v[4] + dxz * v[5]; + Av[1] = ((dyy+dzz)*Real(2.0) + beta[1]) * v[1] + dxy * v[2] + - dxy * v[3] + dxz * v[4] - dxz * v[5]; + Av[2] = -dxy * v[0] + dxy * v[1] + ((dxx+dzz)*Real(2.0) + beta[2]) * v[2] + - dyz * v[4] + dyz * v[5]; + Av[3] = dxy * v[0] - dxy * v[1] + ((dxx+dzz)*Real(2.0) + beta[3]) * v[3] + + dyz * v[4] - dyz * v[5]; + Av[4] = -dxz * v[0] + dxz * v[1] - dyz * v[2] + dyz * v[3] + + ((dxx+dyy)*Real(2.0) + beta[4]) * v[4]; + Av[5] = dxz * v[0] - dxz * v[1] + dyz * v[2] - dyz * v[3] + + ((dxx+dyy)*Real(2.0) + beta[5]) * v[5]; + }; + Real sol[6] = {0, 0, 0, 0, 0, 0}; + pcg_solve<6>(sol, b.data(), mat, precond, 8, Real(1.e-8)); + ex(i-1,j ,k ) = sol[0]; + ex(i ,j ,k ) = sol[1]; + ey(i ,j-1,k ) = sol[2]; + ey(i ,j ,k ) = sol[3]; + ez(i ,j ,k-1) = sol[4]; + ez(i ,j ,k ) = sol[5]; + } else { + LUSolver<6,Real> lusolver + ({(dyy+dzz)*Real(2.0) + beta[0], + Real(0.0), + -dxy, + dxy, + -dxz, + dxz, + // + Real(0.0), + (dyy+dzz)*Real(2.0) + beta[1], + dxy, + -dxy, + dxz, + -dxz, + // + -dxy, + dxy, + (dxx+dzz)*Real(2.0) + beta[2], + Real(0.0), + -dyz, + dyz, + // + dxy, + -dxy, + Real(0.0), + (dxx+dzz)*Real(2.0) + beta[3], + dyz, + -dyz, + // + -dxz, + dxz, + -dyz, + dyz, + (dxx+dyy)*Real(2.0) + beta[4], + Real(0.0), + // + dxz, + -dxz, + dyz, + -dyz, + Real(0.0), + (dxx+dyy)*Real(2.0) + beta[5]}); + lusolver(beta.data(), b.data()); + ex(i-1,j ,k ) = beta[0]; + ex(i ,j ,k ) = beta[1]; + ey(i ,j-1,k ) = beta[2]; + ey(i ,j ,k ) = beta[3]; + ez(i ,j ,k-1) = beta[4]; + ez(i ,j ,k ) = beta[5]; + } #endif } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_1D_K.H index b9e9984f8d1..8f8c9dabfc4 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_1D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_1D_K.H @@ -17,6 +17,21 @@ void mlebndfdlap_gsrb (int /*i*/, int /*j*/, int /*k*/, Array4 const& /*x* { } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx (int /*i*/, int /*j*/, int /*k*/, Array4 const& /*y*/, + Array4 const& /*x*/, Array4 const& /*dmsk*/, + Array4 const& /* sig */, Real /*bx*/) noexcept +{ +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_gsrb (int /*i*/, int /*j*/, int /*k*/, Array4 const& /*x*/, + Array4 const& /*rhs*/, Array4 const& /*dmsk*/, + Array4 const& /* sig */, + Real /*bx*/, int /*redblack*/) noexcept +{ +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H index c23797f8f7b..57bf89bba29 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H @@ -422,6 +422,214 @@ void mlebndfdlap_gsrb_rz (int i, int j, int k, Array4 const& x, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx (int i, int j, int k, Array4 const& y, + Array4 const& x, + Array4 const& dmsk, + Array4 const& sig, + Real bx, Real by) noexcept +{ + if (dmsk(i,j,k)) { + y(i,j,k) = Real(0.0); + } else { + Real sigxm = Real(0.5)*(sig(i-1,j-1,k)+sig(i-1,j ,k)); + Real sigxp = Real(0.5)*(sig(i ,j-1,k)+sig(i ,j ,k)); + Real sigym = Real(0.5)*(sig(i-1,j-1,k)+sig(i ,j-1,k)); + Real sigyp = Real(0.5)*(sig(i-1,j ,k)+sig(i ,j ,k)); + y(i,j,k) = bx * (sigxm*x(i-1,j,k) + sigxp*x(i+1,j,k)) + + by * (sigym*x(i,j-1,k) + sigyp*x(i,j+1,k)) + - (bx*(sigxm+sigxp) + by*(sigym+sigyp)) * x(i,j,k); + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_gsrb (int i, int j, int k, Array4 const& x, + Array4 const& rhs, + Array4 const& dmsk, + Array4 const& sig, + Real bx, Real by, int redblack) noexcept +{ + if ((i+j+k+redblack)%2 == 0) { + if (dmsk(i,j,k)) { + x(i,j,k) = Real(0.); + } else { + Real sigxm = Real(0.5)*(sig(i-1,j-1,k)+sig(i-1,j ,k)); + Real sigxp = Real(0.5)*(sig(i ,j-1,k)+sig(i ,j ,k)); + Real sigym = Real(0.5)*(sig(i-1,j-1,k)+sig(i ,j-1,k)); + Real sigyp = Real(0.5)*(sig(i-1,j ,k)+sig(i ,j ,k)); + Real gamma = -(bx*(sigxm+sigxp) + by*(sigym+sigyp)); + Real Ax = bx * (sigxm*x(i-1,j,k) + sigxp*x(i+1,j,k)) + + by * (sigym*x(i,j-1,k) + sigyp*x(i,j+1,k)) + + gamma * x(i,j,k); + constexpr Real omega = Real(1.25); + x(i,j,k) += (rhs(i,j,k) - Ax) * (omega / gamma); + } + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx_eb_doit (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + Array4 const& ecx, Array4 const& ecy, + Array4 const& sig, Array4 const& vfrc, + F const& xeb, Real bx, Real by) noexcept +{ + if (dmsk(i,j,k)) { + y(i,j,k) = Real(0.0); + } else { + Real tmp, sigma; + Real hp, hm, scale, out; + + sigma = (sig(i,j-1,k)*vfrc(i,j-1,k) + sig(i,j,k)*vfrc(i,j,k)) + / (vfrc(i,j-1,k) + vfrc(i,j,k)); + hp = (ecx(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.)*ecx(i,j,k)); + if (levset(i+1,j,k) < Real(0.0)) { + tmp = sigma*(x(i+1,j,k) - x(i,j,k)); + } else { + tmp = sigma*((xeb(i+1,j,k) - x(i,j,k)) / hp); + } + + sigma = (sig(i-1,j-1,k)*vfrc(i-1,j-1,k) + sig(i-1,j,k)*vfrc(i-1,j,k)) + / (vfrc(i-1,j-1,k) + vfrc(i-1,j,k)); + hm = (ecx(i-1,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecx(i-1,j,k)); + if (levset(i-1,j,k) < Real(0.0)) { + tmp += sigma*(x(i-1,j,k) - x(i,j,k)); + } else { + tmp += sigma*((xeb(i-1,j,k) - x(i,j,k)) / hm); + } + + out = tmp * bx * Real(2.0) / (hp+hm); + scale = amrex::min(hm, hp); + + sigma = (sig(i-1,j,k)*vfrc(i-1,j,k) + sig(i,j,k)*vfrc(i,j,k)) + / (vfrc(i-1,j,k) + vfrc(i,j,k)); + hp = (ecy(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.)*ecy(i,j,k)); + if (levset(i,j+1,k) < Real(0.0)) { + tmp = sigma*(x(i,j+1,k) - x(i,j,k)); + } else { + tmp = sigma*((xeb(i,j+1,k) - x(i,j,k)) / hp); + } + + sigma = (sig(i-1,j-1,k)*vfrc(i-1,j-1,k) + sig(i,j-1,k)*vfrc(i,j-1,k)) + / (vfrc(i-1,j-1,k) + vfrc(i,j-1,k)); + hm = (ecy(i,j-1,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecy(i,j-1,k)); + if (levset(i,j-1,k) < Real(0.0)) { + tmp += sigma*(x(i,j-1,k) - x(i,j,k)); + } else { + tmp += sigma*((xeb(i,j-1,k) - x(i,j,k)) / hm); + } + + out += tmp * by * Real(2.0) / (hp+hm); + scale = amrex::min(scale, hm, hp); + + y(i,j,k) = out*scale; + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx_eb (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + Array4 const& ecx, Array4 const& ecy, + Array4 const& sig, Array4 const& vfrc, + Real xeb, Real bx, Real by) noexcept +{ + mlebndfdlap_sig_adotx_eb_doit(i, j, k, y, x, levset, dmsk, ecx, ecy, sig, vfrc, + [=] (int, int, int) -> Real { return xeb; }, + bx, by); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx_eb (int i, int j, int k, Array4 const& y, + Array4 const& x, Array4 const& levset, + Array4 const& dmsk, + Array4 const& ecx, Array4 const& ecy, + Array4 const& sig, Array4 const& vfrc, + Array4 const& xeb, Real bx, Real by) noexcept +{ + mlebndfdlap_sig_adotx_eb_doit(i, j, k, y, x, levset, dmsk, ecx, ecy, sig, vfrc, + [=] (int i1, int i2, int i3) -> Real { + return xeb(i1,i2,i3); }, + bx, by); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_gsrb_eb (int i, int j, int k, Array4 const& x, + Array4 const& rhs, + Array4 const& levset, + Array4 const& dmsk, + Array4 const& ecx, + Array4 const& ecy, + Array4 const& sig, + Array4 const& vfrc, + Real bx, Real by, int redblack) noexcept +{ + if ((i+j+k+redblack)%2 == 0) { + if (dmsk(i,j,k)) { + x(i,j,k) = Real(0.); + } else { + Real tmp0, tmp1, sigma; + Real hp, hm, scale; + + sigma = (sig(i,j-1,k)*vfrc(i,j-1,k) + sig(i,j,k)*vfrc(i,j,k)) + / (vfrc(i,j-1,k) + vfrc(i,j,k)); + hp = (ecx(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.)*ecx(i,j,k)); + if (levset(i+1,j,k) < Real(0.0)) { // regular + tmp0 = sigma*Real(-1.0); + tmp1 = sigma*x(i+1,j,k); + } else { + tmp0 = sigma*Real(-1.0) / hp; + tmp1 = Real(0.0); + } + + sigma = (sig(i-1,j-1,k)*vfrc(i-1,j-1,k) + sig(i-1,j,k)*vfrc(i-1,j,k)) + / (vfrc(i-1,j-1,k) + vfrc(i-1,j,k)); + hm = (ecx(i-1,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecx(i-1,j,k)); + if (levset(i-1,j,k) < Real(0.0)) { + tmp0 += sigma*Real(-1.0); + tmp1 += sigma*x(i-1,j,k); + } else { + tmp0 += sigma*Real(-1.0) / hm; + } + + Real gamma = tmp0 * (bx * Real(2.0) / (hp+hm)); + Real rho = tmp1 * (bx * Real(2.0) / (hp+hm)); + scale = amrex::min(hm, hp); + + sigma = (sig(i-1,j,k)*vfrc(i-1,j,k) + sig(i,j,k)*vfrc(i,j,k)) + / (vfrc(i-1,j,k) + vfrc(i,j,k)); + hp = (ecy(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.)*ecy(i,j,k)); + if (levset(i,j+1,k) < Real(0.0)) { + tmp0 = sigma*Real(-1.0); + tmp1 = sigma*x(i,j+1,k); + } else { + tmp0 = sigma*Real(-1.0) / hp; + tmp1 = Real(0.0); + } + + sigma = (sig(i-1,j-1,k)*vfrc(i-1,j-1,k) + sig(i,j-1,k)*vfrc(i,j-1,k)) + / (vfrc(i-1,j-1,k) + vfrc(i,j-1,k)); + hm = (ecy(i,j-1,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecy(i,j-1,k)); + if (levset(i,j-1,k) < Real(0.0)) { + tmp0 += sigma*Real(-1.0); + tmp1 += sigma*x(i,j-1,k); + } else { + tmp0 += sigma*Real(-1.0) / hm; + } + + gamma += tmp0 * (by * Real(2.0) / (hp+hm)); + rho += tmp1 * (by * Real(2.0) / (hp+hm)); + scale = amrex::min(scale, hm, hp); + + Real Ax = rho + gamma*x(i,j,k); + constexpr Real omega = Real(1.25); + x(i,j,k) += (rhs(i,j,k) - Ax*scale) * (omega / (gamma*scale)); + } + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H index bebaa33bbc2..9b7fc0fc2bd 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H @@ -236,6 +236,389 @@ void mlebndfdlap_gsrb (int i, int j, int k, Array4 const& x, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx (int i, int j, int k, Array4 const& y, + Array4 const& x, + Array4 const& dmsk, + Array4 const& sig, + Real bx, Real by, Real bz) noexcept +{ + if (dmsk(i,j,k)) { + y(i,j,k) = Real(0.0); + } else { + Real sigxm = Real(0.25)*(sig(i-1,j-1,k-1) + + sig(i-1,j ,k-1) + + sig(i-1,j-1,k ) + + sig(i-1,j ,k )); + Real sigxp = Real(0.25)*(sig(i ,j-1,k-1) + + sig(i ,j ,k-1) + + sig(i ,j-1,k ) + + sig(i ,j ,k )); + Real sigym = Real(0.25)*(sig(i-1,j-1,k-1) + + sig(i ,j-1,k-1) + + sig(i-1,j-1,k ) + + sig(i ,j-1,k )); + Real sigyp = Real(0.25)*(sig(i-1,j ,k-1) + + sig(i ,j ,k-1) + + sig(i-1,j ,k ) + + sig(i ,j ,k )); + Real sigzm = Real(0.25)*(sig(i-1,j-1,k-1) + + sig(i ,j-1,k-1) + + sig(i-1,j ,k-1) + + sig(i ,j ,k-1)); + Real sigzp = Real(0.25)*(sig(i-1,j-1,k ) + + sig(i ,j-1,k ) + + sig(i-1,j ,k ) + + sig(i ,j ,k )); + y(i,j,k) = bx * (sigxm*x(i-1,j,k) + sigxp*x(i+1,j,k)) + + by * (sigym*x(i,j-1,k) + sigyp*x(i,j+1,k)) + + bz * (sigzm*x(i,j,k-1) + sigzp*x(i,j,k+1)) + - (bx*(sigxm+sigxp) + by*(sigym+sigyp) + bz*(sigzm+sigzp)) * x(i,j,k); + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_gsrb (int i, int j, int k, Array4 const& x, + Array4 const& rhs, + Array4 const& dmsk, + Array4 const& sig, + Real bx, Real by, Real bz, int redblack) noexcept +{ + if ((i+j+k+redblack)%2 == 0) { + if (dmsk(i,j,k)) { + x(i,j,k) = Real(0.); + } else { + Real sigxm = Real(0.25)*(sig(i-1,j-1,k-1) + + sig(i-1,j ,k-1) + + sig(i-1,j-1,k ) + + sig(i-1,j ,k )); + Real sigxp = Real(0.25)*(sig(i ,j-1,k-1) + + sig(i ,j ,k-1) + + sig(i ,j-1,k ) + + sig(i ,j ,k )); + Real sigym = Real(0.25)*(sig(i-1,j-1,k-1) + + sig(i ,j-1,k-1) + + sig(i-1,j-1,k ) + + sig(i ,j-1,k )); + Real sigyp = Real(0.25)*(sig(i-1,j ,k-1) + + sig(i ,j ,k-1) + + sig(i-1,j ,k ) + + sig(i ,j ,k )); + Real sigzm = Real(0.25)*(sig(i-1,j-1,k-1) + + sig(i ,j-1,k-1) + + sig(i-1,j ,k-1) + + sig(i ,j ,k-1)); + Real sigzp = Real(0.25)*(sig(i-1,j-1,k ) + + sig(i ,j-1,k ) + + sig(i-1,j ,k ) + + sig(i ,j ,k )); + Real gamma = -(bx*(sigxm+sigxp) + by*(sigym+sigyp) + bz*(sigzm+sigzp)); + Real Ax = bx * (sigxm*x(i-1,j,k) + sigxp*x(i+1,j,k)) + + by * (sigym*x(i,j-1,k) + sigyp*x(i,j+1,k)) + + bz * (sigzm*x(i,j,k-1) + sigzp*x(i,j,k+1)) + + gamma * x(i,j,k); + constexpr Real omega = Real(1.25); + x(i,j,k) += (rhs(i,j,k) - Ax) * (omega / gamma); + } + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx_eb_doit (int i, int j, int k, Array4 const& y, + Array4 const& x, + Array4 const& levset, + Array4 const& dmsk, + Array4 const& ecx, + Array4 const& ecy, + Array4 const& ecz, + Array4 const& sig, + Array4 const& vfrc, + F const& xeb, + Real bx, Real by, Real bz) noexcept +{ + if (dmsk(i,j,k)) { + y(i,j,k) = Real(0.0); + } else { + Real tmp, sigma; + Real hp, hm, scale, out; + + sigma = (sig(i ,j-1,k-1) * vfrc(i ,j-1,k-1) + + sig(i ,j ,k-1) * vfrc(i ,j ,k-1) + + sig(i ,j-1,k ) * vfrc(i ,j-1,k ) + + sig(i ,j ,k ) * vfrc(i ,j ,k )) + / ( vfrc(i ,j-1,k-1) + + vfrc(i ,j ,k-1) + + vfrc(i ,j-1,k ) + + vfrc(i ,j ,k )); + hp = (ecx(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.)*ecx(i,j,k)); + if (levset(i+1,j,k) < Real(0.0)) { // regular + tmp = sigma*(x(i+1,j,k) - x(i,j,k)); + } else { + tmp = sigma*((xeb(i+1,j,k) - x(i,j,k)) / hp); + } + + sigma = (sig(i-1,j-1,k-1) * vfrc(i-1,j-1,k-1) + + sig(i-1,j ,k-1) * vfrc(i-1,j ,k-1) + + sig(i-1,j-1,k ) * vfrc(i-1,j-1,k ) + + sig(i-1,j ,k ) * vfrc(i-1,j ,k )) + / ( vfrc(i-1,j-1,k-1) + + vfrc(i-1,j ,k-1) + + vfrc(i-1,j-1,k ) + + vfrc(i-1,j ,k )); + hm = (ecx(i-1,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecx(i-1,j,k)); + if (levset(i-1,j,k) < Real(0.0)) { + tmp += sigma*(x(i-1,j,k) - x(i,j,k)); + } else { + tmp += sigma*((xeb(i-1,j,k) - x(i,j,k)) / hm); + } + + out = tmp * bx * Real(2.0) / (hp+hm); + scale = amrex::min(hm, hp); + + sigma = (sig(i-1,j ,k-1) * vfrc(i-1,j ,k-1) + + sig(i ,j ,k-1) * vfrc(i ,j ,k-1) + + sig(i-1,j ,k ) * vfrc(i-1,j ,k ) + + sig(i ,j ,k ) * vfrc(i ,j ,k )) + / ( vfrc(i-1,j ,k-1) + + vfrc(i ,j ,k-1) + + vfrc(i-1,j ,k ) + + vfrc(i ,j ,k )); + hp = (ecy(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.)*ecy(i,j,k)); + if (levset(i,j+1,k) < Real(0.0)) { + tmp = sigma*(x(i,j+1,k) - x(i,j,k)); + } else { + tmp = sigma*((xeb(i,j+1,k) - x(i,j,k)) / hp); + } + + sigma = (sig(i-1,j-1,k-1) * vfrc(i-1,j-1,k-1) + + sig(i ,j-1,k-1) * vfrc(i ,j-1,k-1) + + sig(i-1,j-1,k ) * vfrc(i-1,j-1,k ) + + sig(i ,j-1,k ) * vfrc(i ,j-1,k )) + / ( vfrc(i-1,j-1,k-1) + + vfrc(i ,j-1,k-1) + + vfrc(i-1,j-1,k ) + + vfrc(i ,j-1,k )); + hm = (ecy(i,j-1,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecy(i,j-1,k)); + if (levset(i,j-1,k) < Real(0.0)) { + tmp += sigma*(x(i,j-1,k) - x(i,j,k)); + } else { + tmp += sigma*((xeb(i,j-1,k) - x(i,j,k)) / hm); + } + + out += tmp * by * Real(2.0) / (hp+hm); + scale = amrex::min(scale, hm, hp); + + sigma = (sig(i-1,j-1,k ) * vfrc(i-1,j-1,k ) + + sig(i ,j-1,k ) * vfrc(i ,j-1,k ) + + sig(i-1,j ,k ) * vfrc(i-1,j ,k ) + + sig(i ,j ,k ) * vfrc(i ,j ,k )) + / ( vfrc(i-1,j-1,k ) + + vfrc(i ,j-1,k ) + + vfrc(i-1,j ,k ) + + vfrc(i ,j ,k )); + hp = (ecz(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.0)*ecz(i,j,k)); + if (levset(i,j,k+1) < Real(0.0)) { + tmp = sigma*(x(i,j,k+1) - x(i,j,k)); + } else { + tmp = sigma*((xeb(i,j,k+1) - x(i,j,k)) / hp); + } + + sigma = (sig(i-1,j-1,k-1) * vfrc(i-1,j-1,k-1) + + sig(i ,j-1,k-1) * vfrc(i ,j-1,k-1) + + sig(i-1,j ,k-1) * vfrc(i-1,j ,k-1) + + sig(i ,j ,k-1) * vfrc(i ,j ,k-1)) + / ( vfrc(i-1,j-1,k-1) + + vfrc(i ,j-1,k-1) + + vfrc(i-1,j ,k-1) + + vfrc(i ,j ,k-1)); + hm = (ecz(i,j,k-1) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecz(i,j,k-1)); + if (levset(i,j,k-1) < Real(0.0)) { + tmp += sigma*(x(i,j,k-1) - x(i,j,k)); + } else { + tmp += sigma*((xeb(i,j,k-1) - x(i,j,k)) / hm); + } + + out += tmp * bz * Real(2.0) / (hp+hm); + scale = amrex::min(scale, hm, hp); + + y(i,j,k) = out*scale; + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx_eb (int i, int j, int k, Array4 const& y, + Array4 const& x, + Array4 const& levset, + Array4 const& dmsk, + Array4 const& ecx, + Array4 const& ecy, + Array4 const& ecz, + Array4 const& sig, + Array4 const& vfrc, + Real xeb, Real bx, Real by, Real bz) noexcept +{ + mlebndfdlap_sig_adotx_eb_doit(i, j, k, y, x, levset, dmsk, ecx, ecy, ecz, sig, vfrc, + [=] (int, int, int) -> Real { return xeb; }, + bx, by, bz); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_adotx_eb (int i, int j, int k, Array4 const& y, + Array4 const& x, + Array4 const& levset, + Array4 const& dmsk, + Array4 const& ecx, + Array4 const& ecy, + Array4 const& ecz, + Array4 const& sig, + Array4 const& vfrc, + Array4 const& xeb, + Real bx, Real by, Real bz) noexcept +{ + mlebndfdlap_sig_adotx_eb_doit(i, j, k, y, x, levset, dmsk, ecx, ecy, ecz, sig, vfrc, + [=] (int i1, int i2, int i3) -> Real { + return xeb(i1,i2,i3); }, + bx, by, bz); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebndfdlap_sig_gsrb_eb (int i, int j, int k, Array4 const& x, + Array4 const& rhs, + Array4 const& levset, + Array4 const& dmsk, + Array4 const& ecx, + Array4 const& ecy, + Array4 const& ecz, + Array4 const& sig, + Array4 const& vfrc, + Real bx, Real by, Real bz, int redblack) noexcept +{ + if ((i+j+k+redblack)%2 == 0) { + if (dmsk(i,j,k)) { + x(i,j,k) = Real(0.); + } else { + Real tmp0, tmp1, sigma; + Real hp, hm, scale; + + sigma = (sig(i ,j-1,k-1) * vfrc(i ,j-1,k-1) + + sig(i ,j ,k-1) * vfrc(i ,j ,k-1) + + sig(i ,j-1,k ) * vfrc(i ,j-1,k ) + + sig(i ,j ,k ) * vfrc(i ,j ,k )) + / ( vfrc(i ,j-1,k-1) + + vfrc(i ,j ,k-1) + + vfrc(i ,j-1,k ) + + vfrc(i ,j ,k )); + hp = (ecx(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.)*ecx(i,j,k)); + if (levset(i+1,j,k) < Real(0.0)) { // regular + tmp0 = sigma*Real(-1.0); + tmp1 = sigma*x(i+1,j,k); + } else { + tmp0 = sigma*Real(-1.0) / hp; + tmp1 = Real(0.0); + } + + sigma = (sig(i-1,j-1,k-1) * vfrc(i-1,j-1,k-1) + + sig(i-1,j ,k-1) * vfrc(i-1,j ,k-1) + + sig(i-1,j-1,k ) * vfrc(i-1,j-1,k ) + + sig(i-1,j ,k ) * vfrc(i-1,j ,k )) + / ( vfrc(i-1,j-1,k-1) + + vfrc(i-1,j ,k-1) + + vfrc(i-1,j-1,k ) + + vfrc(i-1,j ,k )); + hm = (ecx(i-1,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecx(i-1,j,k)); + if (levset(i-1,j,k) < Real(0.0)) { + tmp0 += sigma*Real(-1.0); + tmp1 += sigma*x(i-1,j,k); + } else { + tmp0 += sigma*Real(-1.0) / hm; + } + + Real gamma = tmp0 * (bx * Real(2.0) / (hp+hm)); + Real rho = tmp1 * (bx * Real(2.0) / (hp+hm)); + scale = amrex::min(hm, hp); + + sigma = (sig(i-1,j ,k-1) * vfrc(i-1,j ,k-1) + + sig(i ,j ,k-1) * vfrc(i ,j ,k-1) + + sig(i-1,j ,k ) * vfrc(i-1,j ,k ) + + sig(i ,j ,k ) * vfrc(i ,j ,k )) + / ( vfrc(i-1,j ,k-1) + + vfrc(i ,j ,k-1) + + vfrc(i-1,j ,k ) + + vfrc(i ,j ,k )); + hp = (ecy(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.)*ecy(i,j,k)); + if (levset(i,j+1,k) < Real(0.0)) { + tmp0 = sigma*Real(-1.0); + tmp1 = sigma*x(i,j+1,k); + } else { + tmp0 = sigma*Real(-1.0) / hp; + tmp1 = Real(0.0); + } + + sigma = (sig(i-1,j-1,k-1) * vfrc(i-1,j-1,k-1) + + sig(i ,j-1,k-1) * vfrc(i ,j-1,k-1) + + sig(i-1,j-1,k ) * vfrc(i-1,j-1,k ) + + sig(i ,j-1,k ) * vfrc(i ,j-1,k )) + / ( vfrc(i-1,j-1,k-1) + + vfrc(i ,j-1,k-1) + + vfrc(i-1,j-1,k ) + + vfrc(i ,j-1,k )); + hm = (ecy(i,j-1,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecy(i,j-1,k)); + if (levset(i,j-1,k) < Real(0.0)) { + tmp0 += sigma*Real(-1.0); + tmp1 += sigma*x(i,j-1,k); + } else { + tmp0 += sigma*Real(-1.0) / hm; + } + + gamma += tmp0 * (by * Real(2.0) / (hp+hm)); + rho += tmp1 * (by * Real(2.0) / (hp+hm)); + scale = amrex::min(scale, hm, hp); + + sigma = (sig(i-1,j-1,k ) * vfrc(i-1,j-1,k ) + + sig(i ,j-1,k ) * vfrc(i ,j-1,k ) + + sig(i-1,j ,k ) * vfrc(i-1,j ,k ) + + sig(i ,j ,k ) * vfrc(i ,j ,k )) + / ( vfrc(i-1,j-1,k ) + + vfrc(i ,j-1,k ) + + vfrc(i-1,j ,k ) + + vfrc(i ,j ,k )); + hp = (ecz(i,j,k) == Real(1.0)) ? Real(1.0) : (Real(1.0)+Real(2.0)*ecz(i,j,k)); + if (levset(i,j,k+1) < Real(0.0)) { + tmp0 = sigma*Real(-1.0); + tmp1 = sigma*x(i,j,k+1); + } else { + tmp0 = sigma*Real(-1.0) / hp; + tmp1 = Real(0.0); + } + + sigma = (sig(i-1,j-1,k-1) * vfrc(i-1,j-1,k-1) + + sig(i ,j-1,k-1) * vfrc(i ,j-1,k-1) + + sig(i-1,j ,k-1) * vfrc(i-1,j ,k-1) + + sig(i ,j ,k-1) * vfrc(i ,j ,k-1)) + / ( vfrc(i-1,j-1,k-1) + + vfrc(i ,j-1,k-1) + + vfrc(i-1,j ,k-1) + + vfrc(i ,j ,k-1)); + hm = (ecz(i,j,k-1) == Real(1.0)) ? Real(1.0) : (Real(1.0)-Real(2.)*ecz(i,j,k-1)); + if (levset(i,j,k-1) < Real(0.0)) { + tmp0 += sigma*Real(-1.0); + tmp1 += sigma*x(i,j,k-1); + } else { + tmp0 += sigma*Real(-1.0) / hm; + } + + gamma += tmp0 * (bz * Real(2.0) / (hp+hm)); + rho += tmp1 * (bz * Real(2.0) / (hp+hm)); + scale = amrex::min(scale, hm, hp); + + Real Ax = rho + gamma*x(i,j,k); + constexpr Real omega = Real(1.25); + x(i,j,k) += (rhs(i,j,k) - Ax*scale) * (omega / (gamma*scale)); + } + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 6e7559d21d9..6ebbd2c65e7 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -20,6 +20,9 @@ namespace amrex { // // del dot (sigma grad phi) - alpha/r^2 phi = rhs, for RZ where alpha is a // scalar constant that is zero by default. +// +// New feature: for non-RZ, sigma can also be a single-component +// cell-centered multifab. class MLEBNodeFDLaplacian : public MLNodeLinOp @@ -50,6 +53,8 @@ public: void setSigma (Array const& a_sigma) noexcept; + void setSigma (int amrlev, MultiFab const& a_sigma); + void setRZ (bool flag); void setAlpha (Real a_alpha); @@ -116,6 +121,8 @@ public: private: GpuArray m_sigma{{AMREX_D_DECL(1_rt,1_rt,1_rt)}}; + Vector>> m_sigma_mf; + bool m_has_sigma_mf = false; Real m_s_phi_eb = std::numeric_limits::lowest(); Vector m_phi_eb; int m_rz = false; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index 5559f133821..af4a6a6d742 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -4,6 +4,11 @@ #include #include +#ifdef AMREX_USE_EB +#include +#include +#endif + namespace amrex { #ifdef AMREX_USE_EB @@ -35,6 +40,19 @@ MLEBNodeFDLaplacian::setSigma (Array const& a_sigma) noexce } } +void +MLEBNodeFDLaplacian::setSigma (int amrlev, MultiFab const& a_sigma) +{ + m_has_sigma_mf = true; + m_sigma_mf[amrlev][0] = std::make_unique + (this->m_grids[amrlev][0], this->m_dmap[amrlev][0], 1, 1, MFInfo{}, + *(this->m_factory[amrlev][0])); + MultiFab::Copy(*m_sigma_mf[amrlev][0], a_sigma, 0, 0, 1, 0); +#ifdef AMREX_USE_EB + amrex::EB_set_covered(*m_sigma_mf[amrlev][0], Real(0.0)); +#endif +} + void MLEBNodeFDLaplacian::setRZ (bool flag) // NOLINT { @@ -92,6 +110,11 @@ MLEBNodeFDLaplacian::define (const Vector& a_geom, int eb_limit_coarsening = true; m_coarsening_strategy = CoarseningStrategy::Sigma; // This will fill nodes outside Neumann BC MLNodeLinOp::define(a_geom, cc_grids, a_dmap, a_info, _factory, eb_limit_coarsening); + + m_sigma_mf.resize(this->m_num_amr_levels); + for (int ilev = 0; ilev < this->m_num_amr_levels; ++ilev) { + m_sigma_mf[ilev].resize(this->m_num_mg_levels[ilev]); + } } #endif @@ -118,16 +141,25 @@ MLEBNodeFDLaplacian::define (const Vector& a_geom, m_coarsening_strategy = CoarseningStrategy::Sigma; // This will fill nodes outside Neumann BC MLNodeLinOp::define(a_geom, cc_grids, a_dmap, a_info); + + m_sigma_mf.resize(this->m_num_amr_levels); + for (int ilev = 0; ilev < this->m_num_amr_levels; ++ilev) { + m_sigma_mf[ilev].resize(this->m_num_mg_levels[ilev]); + } } #ifdef AMREX_USE_EB std::unique_ptr > MLEBNodeFDLaplacian::makeFactory (int amrlev, int mglev) const { - return makeEBFabFactory(m_geom[amrlev][mglev], - m_grids[amrlev][mglev], - m_dmap[amrlev][mglev], - {1,1,1}, EBSupport::full); + if (EB2::TopIndexSpaceIfPresent()) { + return makeEBFabFactory(m_geom[amrlev][mglev], + m_grids[amrlev][mglev], + m_dmap[amrlev][mglev], + {1,1,1}, EBSupport::full); + } else { + return MLNodeLinOp::makeFactory(amrlev, mglev); + } } #endif @@ -138,7 +170,7 @@ MLEBNodeFDLaplacian::restriction (int amrlev, int cmglev, MultiFab& crse, MultiF applyBC(amrlev, cmglev-1, fine, BCMode::Homogeneous, StateMode::Solution); - IntVect const ratio = mg_coarsen_ratio_vec[cmglev-1]; + IntVect const ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[cmglev-1]; int semicoarsening_dir = info.semicoarsening_direction; bool need_parallel_copy = !amrex::isMFIterSafe(crse, fine); @@ -184,7 +216,7 @@ MLEBNodeFDLaplacian::interpolation (int amrlev, int fmglev, MultiFab& fine, { BL_PROFILE("MLEBNodeFDLaplacian::interpolation()"); - IntVect const ratio = mg_coarsen_ratio_vec[fmglev]; + IntVect const ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[fmglev]; int semicoarsening_dir = info.semicoarsening_direction; bool need_parallel_copy = !amrex::isMFIterSafe(crse, fine); @@ -237,17 +269,19 @@ MLEBNodeFDLaplacian::prepareForSolve () for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); - auto const& levset_mf = factory->getLevelSet(); - auto const& levset_ar = levset_mf.const_arrays(); - auto& dmask_mf = *m_dirichlet_mask[amrlev][mglev]; - auto const& dmask_ar = dmask_mf.arrays(); - amrex::ParallelFor(dmask_mf, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - if (levset_ar[box_no](i,j,k) >= Real(0.0)) { - dmask_ar[box_no](i,j,k) = -1; - } - }); + if (factory) { + auto const& levset_mf = factory->getLevelSet(); + auto const& levset_ar = levset_mf.const_arrays(); + auto& dmask_mf = *m_dirichlet_mask[amrlev][mglev]; + auto const& dmask_ar = dmask_mf.arrays(); + amrex::ParallelFor(dmask_mf, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + if (levset_ar[box_no](i,j,k) >= Real(0.0)) { + dmask_ar[box_no](i,j,k) = -1; + } + }); + } } } #endif @@ -279,16 +313,63 @@ MLEBNodeFDLaplacian::prepareForSolve () if (m_sigma[0] == 0._rt) { m_sigma[0] = 1._rt; // For backward compatibility } + AMREX_ASSERT(!m_has_sigma_mf); } #endif + + if (m_has_sigma_mf) { + AMREX_D_TERM(m_sigma[0] = Real(1.0);, + m_sigma[1] = Real(1.0);, + m_sigma[2] = Real(1.0)); + AMREX_ALWAYS_ASSERT(this->m_num_amr_levels == 1); + for (int amrlev = 0; amrlev < this->m_num_amr_levels; ++amrlev) { + for (int mglev = 1; mglev < this->m_num_mg_levels[amrlev]; ++mglev) { + m_sigma_mf[amrlev][mglev] = std::make_unique + (this->m_grids[amrlev][mglev], this->m_dmap[amrlev][mglev], 1, 1, + MFInfo{}, *(this->m_factory[amrlev][mglev])); + IntVect const ratio = (amrlev > 0) ? IntVect (2) + : this->mg_coarsen_ratio_vec[mglev-1]; +#ifdef AMREX_USE_EB + amrex::EB_average_down +#else + amrex::average_down +#endif + (*m_sigma_mf[amrlev][mglev-1], + *m_sigma_mf[amrlev][mglev], 0, 1, ratio); + } + + for (int mglev = 0; mglev < this->m_num_mg_levels[amrlev]; ++mglev) { + auto const& geom = this->m_geom[amrlev][mglev]; + auto& sigma = *m_sigma_mf[amrlev][mglev]; + sigma.FillBoundary(geom.periodicity()); + + const Box& domain = geom.Domain(); + const auto lobc = LoBC(); + const auto hibc = HiBC(); + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) { mfi_info.SetDynamic(true); } +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(sigma, mfi_info); mfi.isValid(); ++mfi) + { + Array4 const& sfab = sigma.array(mfi); + mlndlap_fillbc_cc(mfi.validbox(),sfab,domain,lobc,hibc); + } + } + } + } } #ifdef AMREX_USE_EB void MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab& rhs) const { - auto const& dmask = *m_dirichlet_mask[amrlev][0]; const auto *factory = dynamic_cast(m_factory[amrlev][0].get()); + if (!factory) { return; } + + auto const& dmask = *m_dirichlet_mask[amrlev][0]; auto const& edgecent = factory->getEdgeCent(); #ifdef AMREX_USE_OMP @@ -335,8 +416,10 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa #ifdef AMREX_USE_EB const auto phieb = (m_in_solution_mode) ? m_s_phi_eb : Real(0.0); const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); - auto const& edgecent = factory->getEdgeCent(); - auto const& levset_mf = factory->getLevelSet(); + Array edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (factory) { + edgecent = factory->getEdgeCent(); + } #endif #ifdef AMREX_USE_OMP @@ -349,12 +432,12 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa Array4 const& yarr = out.array(mfi); Array4 const& dmarr = dmask.const_array(mfi); #ifdef AMREX_USE_EB - bool cutfab = edgecent[0]->ok(mfi); - if (cutfab) { + bool cutfab = edgecent[0] && edgecent[0]->ok(mfi); + if (cutfab && factory) { // clang-tidy is not that smart AMREX_D_TERM(Array4 const& ecx = edgecent[0]->const_array(mfi);, Array4 const& ecy = edgecent[1]->const_array(mfi);, Array4 const& ecz = edgecent[2]->const_array(mfi)); - auto const& levset = levset_mf.const_array(mfi); + auto const& levset = factory->getLevelSet().const_array(mfi); if (phieb == std::numeric_limits::lowest()) { auto const& phiebarr = m_phi_eb[amrlev].const_array(mfi); #if (AMREX_SPACEDIM == 2) @@ -366,7 +449,15 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa }); } else #endif - { + if (m_has_sigma_mf) { + auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); + auto const& vfrc = factory->getVolFrac().const_array(mfi); + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_sig_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), + sigarr, vfrc, phiebarr, AMREX_D_DECL(bx,by,bz)); + }); + } else { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), @@ -383,7 +474,15 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa }); } else #endif - { + if (m_has_sigma_mf) { + auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); + auto const& vfrc = factory->getVolFrac().const_array(mfi); + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_sig_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), + sigarr, vfrc, phieb, AMREX_D_DECL(bx,by,bz)); + }); + } else { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx_eb(i,j,k,yarr,xarr,levset,dmarr,AMREX_D_DECL(ecx,ecy,ecz), @@ -402,7 +501,13 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa }); } else #endif - { + if (m_has_sigma_mf) { + auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_sig_adotx(i,j,k,yarr,xarr,dmarr,sigarr,AMREX_D_DECL(bx,by,bz)); + }); + } else { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx(i,j,k,yarr,xarr,dmarr,AMREX_D_DECL(bx,by,bz)); @@ -438,8 +543,10 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF #ifdef AMREX_USE_EB const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); - auto const& edgecent = factory->getEdgeCent(); - auto const& levset_mf = factory->getLevelSet(); + Array edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (factory) { + edgecent = factory->getEdgeCent(); + } #endif #ifdef AMREX_USE_OMP @@ -452,12 +559,12 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF Array4 const& rhsarr = rhs.const_array(mfi); Array4 const& dmskarr = dmask.const_array(mfi); #ifdef AMREX_USE_EB - bool cutfab = edgecent[0]->ok(mfi); - if (cutfab) { + bool cutfab = edgecent[0] && edgecent[0]->ok(mfi); + if (cutfab && factory) { // clang-tidy is not that smart AMREX_D_TERM(Array4 const& ecx = edgecent[0]->const_array(mfi);, Array4 const& ecy = edgecent[1]->const_array(mfi);, Array4 const& ecz = edgecent[2]->const_array(mfi)); - auto const& levset = levset_mf.const_array(mfi); + auto const& levset = factory->getLevelSet().const_array(mfi); #if (AMREX_SPACEDIM == 2) if (m_rz) { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, @@ -467,7 +574,15 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF }); } else #endif - { + if (m_has_sigma_mf) { + auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); + auto const& vfrc = factory->getVolFrac().const_array(mfi); + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_sig_gsrb_eb(i,j,k,solarr,rhsarr,levset,dmskarr,AMREX_D_DECL(ecx,ecy,ecz), + sigarr, vfrc, AMREX_D_DECL(bx,by,bz), redblack); + }); + } else { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_gsrb_eb(i,j,k,solarr,rhsarr,levset,dmskarr,AMREX_D_DECL(ecx,ecy,ecz), @@ -486,7 +601,14 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF }); } else #endif - { + if (m_has_sigma_mf) { + auto const& sigarr = m_sigma_mf[amrlev][mglev]->const_array(mfi); + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_sig_gsrb(i,j,k,solarr,rhsarr,dmskarr,sigarr, + AMREX_D_DECL(bx,by,bz), redblack); + }); + } else { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_gsrb(i,j,k,solarr,rhsarr,dmskarr, @@ -530,8 +652,10 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array auto const& dmask = *m_dirichlet_mask[amrlev][mglev]; const auto phieb = m_s_phi_eb; const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); - AMREX_ASSERT(factory); - auto const& edgecent = factory->getEdgeCent(); + Array edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (factory) { + edgecent = factory->getEdgeCent(); + } #endif #ifdef AMREX_USE_OMP @@ -548,7 +672,7 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array Array4 const& gpz = grad[2]->array(mfi);) #ifdef AMREX_USE_EB Array4 const& dmarr = dmask.const_array(mfi); - bool cutfab = edgecent[0]->ok(mfi); + bool cutfab = edgecent[0] && edgecent[0]->ok(mfi); AMREX_D_TERM(Array4 const& ecx = cutfab ? edgecent[0]->const_array(mfi) : Array4{};, Array4 const& ecy @@ -630,6 +754,7 @@ MLEBNodeFDLaplacian::postSolve (Vector& sol) const for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { const auto phieb = m_s_phi_eb; const auto *factory = dynamic_cast(m_factory[amrlev][0].get()); + if (!factory) { return; } auto const& levset_mf = factory->getLevelSet(); auto const& levset_ar = levset_mf.const_arrays(); MultiFab& mf = sol[amrlev]; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index 03da0874e79..3cc623b761e 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -376,6 +376,8 @@ public: virtual void solutionResidual (int amrlev, MF& resid, MF& x, const MF& b, const MF* crse_bcdata=nullptr) = 0; + virtual void prepareForFluxes (int /*amrlev*/, const MF* /*crse_bcdata*/ = nullptr) {} + /** * \brief Compute residual for the residual-correction form, resid = b - L(x) * diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 77f9abe409e..78b2ffdd3df 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -164,6 +164,8 @@ public: void setHypreStrongThreshold (Real t) noexcept {hypre_strong_threshold = t;} #endif + void prepareForFluxes (Vector const& a_sol); + template void prepareForSolve (Vector const& a_sol, Vector const& a_rhs); @@ -538,6 +540,16 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, return composite_norminf; } +template +void +MLMGT::prepareForFluxes (Vector const& a_sol) +{ + for (int alev = finest_amr_lev; alev >= 0; --alev) { + const MF* crse_bcdata = (alev > 0) ? a_sol[alev-1] : nullptr; + linop.prepareForFluxes(alev, crse_bcdata); + } +} + template template void @@ -971,7 +983,6 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& if (nGrowVect(*a_sol[alev]) == ng_sol) { if constexpr (std::is_same()) { sol[alev] = linop.makeAlias(*a_sol[alev]); - setBndry(sol[alev], RT(0.0), 0, ncomp); sol_is_alias[alev] = true; } } @@ -1558,29 +1569,32 @@ MLMGT::actualBottomSolve () } else { cg_type = MLCGSolverT::Type::BiCGStab; } + int ret = bottomSolveWithCG(x, *bottom_b, cg_type); - // If the MLMG solve failed then set the correction to zero - if (ret != 0 && ret != 9) { + + if (ret != 0 && (bottom_solver == BottomSolver::cgbicg || + bottom_solver == BottomSolver::bicgcg)) + { + if (bottom_solver == BottomSolver::cgbicg) { + cg_type = MLCGSolverT::Type::BiCGStab; // switch to bicg + } else { + cg_type = MLCGSolverT::Type::CG; // switch to cg + } setVal(cor[amrlev][mglev], RT(0.0)); - if (bottom_solver == BottomSolver::cgbicg || - bottom_solver == BottomSolver::bicgcg) { - if (bottom_solver == BottomSolver::cgbicg) { - cg_type = MLCGSolverT::Type::BiCGStab; // switch to bicg + ret = bottomSolveWithCG(x, *bottom_b, cg_type); + if (ret == 0) { // switch permanently + if (cg_type == MLCGSolverT::Type::CG) { + bottom_solver = BottomSolver::cg; } else { - cg_type = MLCGSolverT::Type::CG; // switch to cg - } - ret = bottomSolveWithCG(x, *bottom_b, cg_type); - if (ret != 0) { - setVal(cor[amrlev][mglev], RT(0.0)); - } else { // switch permanently - if (cg_type == MLCGSolverT::Type::CG) { - bottom_solver = BottomSolver::cg; - } else { - bottom_solver = BottomSolver::bicgstab; - } + bottom_solver = BottomSolver::bicgstab; } } } + + // If the bottom solve failed then set the correction to zero + if (ret != 0 && ret != 9) { + setVal(cor[amrlev][mglev], RT(0.0)); + } const int n = (ret==0) ? nub : nuf; for (int i = 0; i < n; ++i) { linop.smooth(amrlev, mglev, x, b); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H index 4f982e07752..91d02257396 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H @@ -194,7 +194,7 @@ void mlndlap_normalize_aa (int i, int j, int k, Array4 const& x, mlndlap_normalize_ha(i,j,k,x,sx,msk,dxinv); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_jacobi_ha (int i, int, int, Array4 const& sol, Real Ax, Array4 const& rhs, Array4 const& sx, Array4 const& msk, @@ -208,7 +208,7 @@ void mlndlap_jacobi_ha (int i, int, int, Array4 const& sol, Real Ax, } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_jacobi_ha (Box const& bx, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Array4 const& sx, Array4 const& msk, @@ -216,7 +216,7 @@ void mlndlap_jacobi_ha (Box const& bx, Array4 const& sol, Array4 const& sol, Array4 const& sol, Real Ax, Array4 const& rhs, Array4 const& sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -235,7 +235,7 @@ void mlndlap_jacobi_aa (int i, int j, int k, Array4 const& sol, Real Ax, mlndlap_jacobi_ha(i,j,k,sol,Ax,rhs,sig,msk,dxinv); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_jacobi_aa (Box const& bx, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Array4 const& sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -243,7 +243,7 @@ void mlndlap_jacobi_aa (Box const& bx, Array4 const& sol, Array4 const& sol, Real Ax, Array4 const& rhs, Real sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -256,12 +256,12 @@ void mlndlap_jacobi_c (int i, int, int, Array4 const& sol, Real Ax, } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_jacobi_c (Box const& bx, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Real sig, Array4 const& msk, GpuArray const& dxinv) noexcept { - amrex::LoopConcurrent(bx, [=] (int i, int, int) noexcept + amrex::LoopConcurrentOnCpu(bx, [&] (int i, int, int) noexcept { if (msk(i,0,0)) { sol(i,0,0) = Real(0.0); @@ -272,7 +272,7 @@ void mlndlap_jacobi_c (Box const& bx, Array4 const& sol, Array4 const& sol, Array4 const& rhs, Array4 const& sx, @@ -281,7 +281,7 @@ void mlndlap_gauss_seidel_ha (Box const& bx, Array4 const& sol, { Real fac = dxinv[0]*dxinv[0]; - amrex::Loop(bx, [=] (int i, int, int) noexcept + amrex::LoopOnCpu(bx, [&] (int i, int, int) noexcept { if (msk(i,0,0)) { sol(i,0,0) = Real(0.0); @@ -295,7 +295,7 @@ void mlndlap_gauss_seidel_ha (Box const& bx, Array4 const& sol, }); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_aa (Box const& bx, Array4 const& sol, Array4 const& rhs, Array4 const& sx, @@ -305,7 +305,7 @@ void mlndlap_gauss_seidel_aa (Box const& bx, Array4 const& sol, mlndlap_gauss_seidel_ha(bx,sol,rhs,sx,msk,dxinv); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, Array4 const& rhs, Real sig, Array4 const& msk, @@ -313,7 +313,7 @@ void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, { Real fac = dxinv[0]*dxinv[0]; - amrex::Loop(bx, [=] (int i, int, int) noexcept + amrex::LoopOnCpu(bx, [&] (int i, int, int) noexcept { if (msk(i,0,0)) { sol(i,0,0) = Real(0.0); @@ -327,7 +327,7 @@ void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, }); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_with_line_solve_aa(Box const&, Array4 const&, Array4 const&, Array4 const&, Array4 const&, GpuArray const&) noexcept @@ -556,7 +556,7 @@ Real mlndlap_adotx_sten (int /*i*/, int /*j*/, int /*k*/, Array4 con Array4 const&, Array4 const&) noexcept { return Real(0.0); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_sten (Box const&, Array4 const&, Array4 const&, Array4 const&, @@ -575,6 +575,72 @@ void mlndlap_restriction_rap (int /*i*/, int /*j*/, int /*k*/, Array4 cons Array4 const&) noexcept {} +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +int mlndlap_color (int i, int, int) +{ + return i%2; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_ha (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, + Array4 const& sx, + Array4 const& msk, + GpuArray const& dxinv, int color) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + if (msk(i,0,0)) { + sol(i,0,0) = Real(0.0); + } else { + Real fac = dxinv[0]*dxinv[0]; + + Real s0 = Real(-1.0) * fac * (sx(i-1,0,0)+sx(i,0,0)); + Real Ax = sol(i-1,0,0)*fac*sx(i-1,0,0) + + sol(i+1,0,0)*fac*sx(i ,0,0) + + sol(i ,0,0)*s0; + sol(i,0,0) += (rhs(i,0,0) - Ax) / s0; + } + } +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_aa (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, + Array4 const& sx, + Array4 const& msk, + GpuArray const& dxinv, int color) noexcept +{ + mlndlap_gscolor_ha(i,j,k,sol,rhs,sx,msk,dxinv,color); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_c (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, Real sig, + Array4 const& msk, + GpuArray const& dxinv, int color) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + if (msk(i,0,0)) { + sol(i,0,0) = Real(0.0); + } else { + Real fac = dxinv[0]*dxinv[0]; + + Real s0 = Real(-2.0) * fac * sig; + Real Ax = sol(i-1,0,0)*fac*sig + + sol(i+1,0,0)*fac*sig + + sol(i ,0,0)*s0; + sol(i,0,0) += (rhs(i,0,0) - Ax) / s0; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_sten (int, int, int, Array4 const&, + Array4 const&, + Array4 const&, + Array4 const&, int) noexcept +{} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H index 372215f5d73..05f02aaa927 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H @@ -418,7 +418,7 @@ void mlndlap_normalize_aa (int i, int j, int k, Array4 const& x, Array4 const& sol, Real Ax, Array4 const& rhs, Array4 const& sx, Array4 const& sy, Array4 const& msk, @@ -436,7 +436,7 @@ void mlndlap_jacobi_ha (int i, int j, int k, Array4 const& sol, Real Ax, } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_jacobi_ha (Box const& bx, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Array4 const& sx, Array4 const& sy, Array4 const& msk, @@ -445,7 +445,7 @@ void mlndlap_jacobi_ha (Box const& bx, Array4 const& sol, Array4 const& sol, Array4 const& sol, Real Ax, Array4 const& rhs, Array4 const& sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -472,7 +472,7 @@ void mlndlap_jacobi_aa (int i, int j, int k, Array4 const& sol, Real Ax, } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_jacobi_c (int i, int j, int k, Array4 const& sol, Real Ax, Array4 const& rhs, Real sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -487,14 +487,14 @@ void mlndlap_jacobi_c (int i, int j, int k, Array4 const& sol, Real Ax, } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_jacobi_aa (Box const& bx, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Array4 const& sig, Array4 const& msk, GpuArray const& dxinv) noexcept { Real fac = -Real(2.0/6.0)*(dxinv[0]*dxinv[0] + dxinv[1]*dxinv[1]); - amrex::LoopConcurrent(bx, [=] (int i, int j, int k) noexcept + amrex::LoopConcurrentOnCpu(bx, [&] (int i, int j, int k) noexcept { if (msk(i,j,k)) { sol(i,j,k) = Real(0.0); @@ -505,14 +505,14 @@ void mlndlap_jacobi_aa (Box const& bx, Array4 const& sol, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Real sig, Array4 const& msk, GpuArray const& dxinv) noexcept { Real fac = -Real(2.0/6.0)*(dxinv[0]*dxinv[0] + dxinv[1]*dxinv[1]); - amrex::LoopConcurrent(bx, [=] (int i, int j, int k) noexcept + amrex::LoopConcurrentOnCpu(bx, [&] (int i, int j, int k) noexcept { if (msk(i,j,k)) { sol(i,j,k) = Real(0.0); @@ -523,7 +523,7 @@ void mlndlap_jacobi_c (Box const& bx, Array4 const& sol, Array4 const& sol, Array4 const& rhs, Array4 const& sx, Array4 const& sy, Array4 const& msk, @@ -533,7 +533,7 @@ void mlndlap_gauss_seidel_ha (Box const& bx, Array4 const& sol, Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0]; Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1]; - amrex::Loop(bx, [=] (int i, int j, int k) noexcept + amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept { if (msk(i,j,k)) { sol(i,j,k) = Real(0.0); @@ -570,7 +570,7 @@ void mlndlap_gauss_seidel_ha (Box const& bx, Array4 const& sol, }); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_aa (Box const& bx, Array4 const& sol, Array4 const& rhs, Array4 const& sig, Array4 const& msk, @@ -583,7 +583,7 @@ void mlndlap_gauss_seidel_aa (Box const& bx, Array4 const& sol, Real f2xmy = Real(2.0)*facx - facy; Real fmx2y = Real(2.0)*facy - facx; - amrex::Loop(bx, [=] (int i, int j, int k) noexcept + amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept { if (msk(i,j,k)) { sol(i,j,k) = Real(0.0); @@ -614,7 +614,7 @@ void mlndlap_gauss_seidel_aa (Box const& bx, Array4 const& sol, }); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, Array4 const& rhs, Real sig, Array4 const& msk, @@ -627,7 +627,7 @@ void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, Real f2xmy = Real(2.0)*facx - facy; Real fmx2y = Real(2.0)*facy - facx; - amrex::Loop(bx, [=] (int i, int j, int k) noexcept + amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept { if (msk(i,j,k)) { sol(i,j,k) = Real(0.0); @@ -658,7 +658,7 @@ void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, }); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +AMREX_FORCE_INLINE void tridiagonal_solve (Array1D& a_ls, Array1D& b_ls, Array1D& c_ls, Array1D& r_ls, Array1D& u_ls, Array1D& gam, int ilen ) noexcept @@ -677,7 +677,7 @@ void tridiagonal_solve (Array1D& a_ls, Array1D& b_ls, Arra } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_with_line_solve_aa (Box const& bx, Array4 const& sol, Array4 const& rhs, Array4 const& sig, Array4 const& msk, @@ -1819,6 +1819,21 @@ void mlndlap_stencil_rap (int i, int j, int, Array4 const& csten, csten(i,j,k,3) = Real(0.5)*(cross1+cross2); } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlndlap_adotx_sten_doit (int i, int j, int k, Array4 const& x, + Array4 const& sten) noexcept +{ + return x(i-1,j-1,k)*sten(i-1,j-1,k,3) + + x(i ,j-1,k)*sten(i ,j-1,k,2) + + x(i+1,j-1,k)*sten(i ,j-1,k,3) + + x(i-1,j ,k)*sten(i-1,j ,k,1) + + x(i ,j ,k)*sten(i ,j ,k,0) + + x(i+1,j ,k)*sten(i ,j ,k,1) + + x(i-1,j+1,k)*sten(i-1,j ,k,3) + + x(i ,j+1,k)*sten(i ,j ,k,2) + + x(i+1,j+1,k)*sten(i ,j ,k,3); +} + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_adotx_sten (int i, int j, int k, Array4 const& x, Array4 const& sten, Array4 const& msk) noexcept @@ -1826,40 +1841,33 @@ Real mlndlap_adotx_sten (int i, int j, int k, Array4 const& x, if (msk(i,j,k)) { return Real(0.0); } else { - return x(i-1,j-1,k)*sten(i-1,j-1,k,3) - + x(i ,j-1,k)*sten(i ,j-1,k,2) - + x(i+1,j-1,k)*sten(i ,j-1,k,3) - + x(i-1,j ,k)*sten(i-1,j ,k,1) - + x(i ,j ,k)*sten(i ,j ,k,0) - + x(i+1,j ,k)*sten(i ,j ,k,1) - + x(i-1,j+1,k)*sten(i-1,j ,k,3) - + x(i ,j+1,k)*sten(i ,j ,k,2) - + x(i+1,j+1,k)*sten(i ,j ,k,3); + return mlndlap_adotx_sten_doit(i,j,k,x,sten); } } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlndlap_gauss_seidel_sten (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, + Array4 const& sten, + Array4 const& msk) noexcept +{ + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else if (sten(i,j,k,0) != Real(0.0)) { + Real Ax = mlndlap_adotx_sten_doit(i,j,k,sol,sten); + sol(i,j,k) += (rhs(i,j,k) - Ax) / sten(i,j,k,0); + } +} + +inline void mlndlap_gauss_seidel_sten (Box const& bx, Array4 const& sol, Array4 const& rhs, Array4 const& sten, Array4 const& msk) noexcept { - amrex::LoopConcurrent(bx, [=] (int i, int j, int k) noexcept + AMREX_LOOP_3D(bx, i, j, k, { - if (msk(i,j,k)) { - sol(i,j,k) = Real(0.0); - } else if (sten(i,j,k,0) != Real(0.0)) { - Real Ax = sol(i-1,j-1,k)*sten(i-1,j-1,k,3) - + sol(i ,j-1,k)*sten(i ,j-1,k,2) - + sol(i+1,j-1,k)*sten(i ,j-1,k,3) - + sol(i-1,j ,k)*sten(i-1,j ,k,1) - + sol(i ,j ,k)*sten(i ,j ,k,0) - + sol(i+1,j ,k)*sten(i ,j ,k,1) - + sol(i-1,j+1,k)*sten(i-1,j ,k,3) - + sol(i ,j+1,k)*sten(i ,j ,k,2) - + sol(i+1,j+1,k)*sten(i ,j ,k,3); - sol(i,j,k) += (rhs(i,j,k) - Ax) / sten(i,j,k,0); - } + mlndlap_gauss_seidel_sten(i,j,k,sol,rhs,sten,msk); }); } @@ -3536,5 +3544,154 @@ void mlndlap_fillijmat_cs_gpu (const int ps, const int i, const int j, const int #endif +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +int mlndlap_color (int i, int j, int) +{ + return (i%2) + (j%2)*2; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_ha (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, Array4 const& sx, + Array4 const& sy, Array4 const& msk, + GpuArray const& dxinv, int color, + bool is_rz) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1]; + + Real s0 = Real(-2.0)*(facx*(sx(i-1,j-1,k)+sx(i,j-1,k)+sx(i-1,j,k)+sx(i,j,k)) + +facy*(sy(i-1,j-1,k)+sy(i,j-1,k)+sy(i-1,j,k)+sy(i,j,k))); + + Real Ax = sol(i-1,j-1,k)*(facx*sx(i-1,j-1,k)+facy*sy(i-1,j-1,k)) + + sol(i+1,j-1,k)*(facx*sx(i ,j-1,k)+facy*sy(i ,j-1,k)) + + sol(i-1,j+1,k)*(facx*sx(i-1,j ,k)+facy*sy(i-1,j ,k)) + + sol(i+1,j+1,k)*(facx*sx(i ,j ,k)+facy*sy(i ,j ,k)) + + sol(i-1,j,k)*(Real(2.0)*facx*(sx(i-1,j-1,k)+sx(i-1,j,k)) + - facy*(sy(i-1,j-1,k)+sy(i-1,j,k))) + + sol(i+1,j,k)*(Real(2.0)*facx*(sx(i ,j-1,k)+sx(i ,j,k)) + - facy*(sy(i ,j-1,k)+sy(i ,j,k))) + + sol(i,j-1,k)*( -facx*(sx(i-1,j-1,k)+sx(i,j-1,k)) + +Real(2.0)*facy*(sy(i-1,j-1,k)+sy(i,j-1,k))) + + sol(i,j+1,k)*( -facx*(sx(i-1,j ,k)+sx(i,j ,k)) + +Real(2.0)*facy*(sy(i-1,j ,k)+sy(i,j ,k))) + + sol(i,j,k)*s0; + + if (is_rz) { + Real fp = facy / static_cast(2*i+1); + Real fm = facy / static_cast(2*i-1); + Real frzlo = fm*sy(i-1,j-1,k)-fp*sy(i,j-1,k); + Real frzhi = fm*sy(i-1,j ,k)-fp*sy(i,j ,k); + s0 += - frzhi - frzlo; + Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k)) + + frzlo*(sol(i,j-1,k)-sol(i,j,k)); + } + + sol(i,j,k) += (rhs(i,j,k) - Ax) / s0; + } + } +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_aa (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, Array4 const& sig, + Array4 const& msk, + GpuArray const& dxinv, int color, + bool is_rz) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1]; + Real fxy = facx + facy; + Real f2xmy = Real(2.0)*facx - facy; + Real fmx2y = Real(2.0)*facy - facx; + + Real s0 = (-Real(2.0))*fxy*(sig(i-1,j-1,k)+sig(i,j-1,k)+sig(i-1,j,k)+sig(i,j,k)); + Real Ax = sol(i-1,j-1,k)*fxy*sig(i-1,j-1,k) + + sol(i+1,j-1,k)*fxy*sig(i ,j-1,k) + + sol(i-1,j+1,k)*fxy*sig(i-1,j ,k) + + sol(i+1,j+1,k)*fxy*sig(i ,j ,k) + + sol(i-1,j,k)*f2xmy*(sig(i-1,j-1,k)+sig(i-1,j,k)) + + sol(i+1,j,k)*f2xmy*(sig(i ,j-1,k)+sig(i ,j,k)) + + sol(i,j-1,k)*fmx2y*(sig(i-1,j-1,k)+sig(i,j-1,k)) + + sol(i,j+1,k)*fmx2y*(sig(i-1,j ,k)+sig(i,j ,k)) + + sol(i,j,k)*s0; + + if (is_rz) { + Real fp = facy / static_cast(2*i+1); + Real fm = facy / static_cast(2*i-1); + Real frzlo = fm*sig(i-1,j-1,k)-fp*sig(i,j-1,k); + Real frzhi = fm*sig(i-1,j ,k)-fp*sig(i,j ,k); + s0 += - frzhi - frzlo; + Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k)) + + frzlo*(sol(i,j-1,k)-sol(i,j,k)); + } + + sol(i,j,k) += (rhs(i,j,k) - Ax) / s0; + } + } +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_c (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, Real sig, + Array4 const& msk, + GpuArray const& dxinv, int color, + bool is_rz) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1]; + Real fxy = facx + facy; + Real f2xmy = Real(2.0)*facx - facy; + Real fmx2y = Real(2.0)*facy - facx; + + Real s0 = (-Real(2.0))*fxy*Real(4.); + Real Ax = sol(i-1,j-1,k)*fxy + + sol(i+1,j-1,k)*fxy + + sol(i-1,j+1,k)*fxy + + sol(i+1,j+1,k)*fxy + + sol(i-1,j,k)*f2xmy*Real(2.) + + sol(i+1,j,k)*f2xmy*Real(2.) + + sol(i,j-1,k)*fmx2y*Real(2.) + + sol(i,j+1,k)*fmx2y*Real(2.) + + sol(i,j,k)*s0; + + if (is_rz) { + Real fp = facy / static_cast(2*i+1); + Real fm = facy / static_cast(2*i-1); + Real frzlo = fm-fp; + Real frzhi = fm-fp; + s0 += - frzhi - frzlo; + Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k)) + + frzlo*(sol(i,j-1,k)-sol(i,j,k)); + } + + sol(i,j,k) += (rhs(i,j,k) - Ax*sig) / (s0*sig); + } + } +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_sten (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, + Array4 const& sten, + Array4 const& msk, int color) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + mlndlap_gauss_seidel_sten(i,j,k,sol,rhs,sten,msk); + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H index f6b94e7c526..5d31de02711 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H @@ -988,7 +988,7 @@ void mlndlap_normalize_aa (int i, int j, int k, Array4 const& x, Array4 const& sol, Real Ax, Array4 const& rhs, Array4 const& sx, Array4 const& sy, Array4 const& sz, @@ -1011,7 +1011,7 @@ void mlndlap_jacobi_ha (int i, int j, int k, Array4 const& sol, Real Ax, } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_jacobi_ha (Box const& bx, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Array4 const& sx, Array4 const& sy, Array4 const& sz, @@ -1021,7 +1021,7 @@ void mlndlap_jacobi_ha (Box const& bx, Array4 const& sol, Array4 const& sol, Array4 const& sol, Real Ax, Array4 const& rhs, Array4 const& sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -1055,7 +1055,7 @@ void mlndlap_jacobi_aa (int i, int j, int k, Array4 const& sol, Real Ax, } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_jacobi_c (int i, int j, int k, Array4 const& sol, Real Ax, Array4 const& rhs, Real sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -1072,7 +1072,7 @@ void mlndlap_jacobi_c (int i, int j, int k, Array4 const& sol, Real Ax, } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_jacobi_aa (Box const& bx, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Array4 const& sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -1081,7 +1081,7 @@ void mlndlap_jacobi_aa (Box const& bx, Array4 const& sol, Array4 const& sol, Array4 const& sol, Array4 const& Ax, Array4 const& rhs, Real sig, Array4 const& msk, GpuArray const& dxinv) noexcept @@ -1102,7 +1102,7 @@ void mlndlap_jacobi_c (Box const& bx, Array4 const& sol, Array4 const& sol, Array4 const& sol, Array4 const& rhs, Array4 const& sx, Array4 const& sy, Array4 const& sz, @@ -1124,7 +1124,7 @@ void mlndlap_gauss_seidel_ha (Box const& bx, Array4 const& sol, Real facy = Real(1.0/36.0)*dxinv[1]*dxinv[1]; Real facz = Real(1.0/36.0)*dxinv[2]*dxinv[2]; - amrex::Loop(bx, [=] (int i, int j, int k) noexcept + amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept { if (msk(i,j,k)) { sol(i,j,k) = Real(0.0); @@ -1220,7 +1220,7 @@ void mlndlap_gauss_seidel_ha (Box const& bx, Array4 const& sol, }); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_aa (Box const& bx, Array4 const& sol, Array4 const& rhs, Array4 const& sig, Array4 const& msk, @@ -1237,7 +1237,7 @@ void mlndlap_gauss_seidel_aa (Box const& bx, Array4 const& sol, Real fm2x4ym2z = -Real(2.0)*facx + Real(4.0)*facy - Real(2.0)*facz; Real fm2xm2y4z = -Real(2.0)*facx - Real(2.0)*facy + Real(4.0)*facz; - amrex::Loop(bx, [=] (int i, int j, int k) noexcept + amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept { if (msk(i,j,k)) { sol(i,j,k) = Real(0.0); @@ -1277,7 +1277,7 @@ void mlndlap_gauss_seidel_aa (Box const& bx, Array4 const& sol, }); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, Array4 const& rhs, Real sig, Array4 const& msk, @@ -1294,7 +1294,7 @@ void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, Real fm2x4ym2z = -Real(2.0)*facx + Real(4.0)*facy - Real(2.0)*facz; Real fm2xm2y4z = -Real(2.0)*facx - Real(2.0)*facy + Real(4.0)*facz; - amrex::Loop(bx, [=] (int i, int j, int k) noexcept + amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept { if (msk(i,j,k)) { sol(i,j,k) = Real(0.0); @@ -1333,7 +1333,7 @@ void mlndlap_gauss_seidel_c (Box const& bx, Array4 const& sol, }); } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +AMREX_FORCE_INLINE void tridiagonal_solve (Array1D& a_ls, Array1D& b_ls, Array1D& c_ls, Array1D& r_ls, Array1D& u_ls, Array1D& gam, int ilen ) noexcept @@ -1352,7 +1352,7 @@ void tridiagonal_solve (Array1D& a_ls, Array1D& b_ls, Arra } } -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +inline void mlndlap_gauss_seidel_with_line_solve_aa (Box const& bx, Array4 const& sol, Array4 const& rhs, Array4 const& sig, Array4 const& msk, @@ -5460,101 +5460,83 @@ void mlndlap_stencil_rap (int i, int j, int k, Array4 const& csten, } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -Real mlndlap_adotx_sten (int i, int j, int k, Array4 const& x, - Array4 const& sten, Array4 const& msk) noexcept +Real mlndlap_adotx_sten_doit (int i, int j, int k, Array4 const& x, + Array4 const& sten) noexcept { using namespace nodelap_detail; + return x(i ,j ,k ) * sten(i ,j ,k ,ist_000) + // + + x(i-1,j ,k ) * sten(i-1,j ,k ,ist_p00) + + x(i+1,j ,k ) * sten(i ,j ,k ,ist_p00) + // + + x(i ,j-1,k ) * sten(i ,j-1,k ,ist_0p0) + + x(i ,j+1,k ) * sten(i ,j ,k ,ist_0p0) + // + + x(i ,j ,k-1) * sten(i ,j ,k-1,ist_00p) + + x(i ,j ,k+1) * sten(i ,j ,k ,ist_00p) + // + + x(i-1,j-1,k ) * sten(i-1,j-1,k ,ist_pp0) + + x(i+1,j-1,k ) * sten(i ,j-1,k ,ist_pp0) + + x(i-1,j+1,k ) * sten(i-1,j ,k ,ist_pp0) + + x(i+1,j+1,k ) * sten(i ,j ,k ,ist_pp0) + // + + x(i-1,j ,k-1) * sten(i-1,j ,k-1,ist_p0p) + + x(i+1,j ,k-1) * sten(i ,j ,k-1,ist_p0p) + + x(i-1,j ,k+1) * sten(i-1,j ,k ,ist_p0p) + + x(i+1,j ,k+1) * sten(i ,j ,k ,ist_p0p) + // + + x(i ,j-1,k-1) * sten(i ,j-1,k-1,ist_0pp) + + x(i ,j+1,k-1) * sten(i ,j ,k-1,ist_0pp) + + x(i ,j-1,k+1) * sten(i ,j-1,k ,ist_0pp) + + x(i ,j+1,k+1) * sten(i ,j ,k ,ist_0pp) + // + + x(i-1,j-1,k-1) * sten(i-1,j-1,k-1,ist_ppp) + + x(i+1,j-1,k-1) * sten(i ,j-1,k-1,ist_ppp) + + x(i-1,j+1,k-1) * sten(i-1,j ,k-1,ist_ppp) + + x(i+1,j+1,k-1) * sten(i ,j ,k-1,ist_ppp) + + x(i-1,j-1,k+1) * sten(i-1,j-1,k ,ist_ppp) + + x(i+1,j-1,k+1) * sten(i ,j-1,k ,ist_ppp) + + x(i-1,j+1,k+1) * sten(i-1,j ,k ,ist_ppp) + + x(i+1,j+1,k+1) * sten(i ,j ,k ,ist_ppp); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlndlap_adotx_sten (int i, int j, int k, Array4 const& x, + Array4 const& sten, Array4 const& msk) noexcept +{ if (msk(i,j,k)) { return Real(0.0); } else { - return x(i ,j ,k ) * sten(i ,j ,k ,ist_000) - // - + x(i-1,j ,k ) * sten(i-1,j ,k ,ist_p00) - + x(i+1,j ,k ) * sten(i ,j ,k ,ist_p00) - // - + x(i ,j-1,k ) * sten(i ,j-1,k ,ist_0p0) - + x(i ,j+1,k ) * sten(i ,j ,k ,ist_0p0) - // - + x(i ,j ,k-1) * sten(i ,j ,k-1,ist_00p) - + x(i ,j ,k+1) * sten(i ,j ,k ,ist_00p) - // - + x(i-1,j-1,k ) * sten(i-1,j-1,k ,ist_pp0) - + x(i+1,j-1,k ) * sten(i ,j-1,k ,ist_pp0) - + x(i-1,j+1,k ) * sten(i-1,j ,k ,ist_pp0) - + x(i+1,j+1,k ) * sten(i ,j ,k ,ist_pp0) - // - + x(i-1,j ,k-1) * sten(i-1,j ,k-1,ist_p0p) - + x(i+1,j ,k-1) * sten(i ,j ,k-1,ist_p0p) - + x(i-1,j ,k+1) * sten(i-1,j ,k ,ist_p0p) - + x(i+1,j ,k+1) * sten(i ,j ,k ,ist_p0p) - // - + x(i ,j-1,k-1) * sten(i ,j-1,k-1,ist_0pp) - + x(i ,j+1,k-1) * sten(i ,j ,k-1,ist_0pp) - + x(i ,j-1,k+1) * sten(i ,j-1,k ,ist_0pp) - + x(i ,j+1,k+1) * sten(i ,j ,k ,ist_0pp) - // - + x(i-1,j-1,k-1) * sten(i-1,j-1,k-1,ist_ppp) - + x(i+1,j-1,k-1) * sten(i ,j-1,k-1,ist_ppp) - + x(i-1,j+1,k-1) * sten(i-1,j ,k-1,ist_ppp) - + x(i+1,j+1,k-1) * sten(i ,j ,k-1,ist_ppp) - + x(i-1,j-1,k+1) * sten(i-1,j-1,k ,ist_ppp) - + x(i+1,j-1,k+1) * sten(i ,j-1,k ,ist_ppp) - + x(i-1,j+1,k+1) * sten(i-1,j ,k ,ist_ppp) - + x(i+1,j+1,k+1) * sten(i ,j ,k ,ist_ppp); + return mlndlap_adotx_sten_doit(i,j,k,x,sten); } } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mlndlap_gauss_seidel_sten (Box const& bx, Array4 const& sol, +void mlndlap_gauss_seidel_sten (int i, int j, int k, Array4 const& sol, Array4 const& rhs, Array4 const& sten, Array4 const& msk) noexcept { using namespace nodelap_detail; - amrex::LoopConcurrent(bx, [=] (int i, int j, int k) noexcept + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else if (sten(i,j,k,ist_000) != Real(0.0)) { + Real Ax = mlndlap_adotx_sten_doit(i,j,k,sol,sten); + sol(i,j,k) += (rhs(i,j,k) - Ax) / sten(i,j,k,ist_000); + } +} + +inline +void mlndlap_gauss_seidel_sten (Box const& bx, Array4 const& sol, + Array4 const& rhs, + Array4 const& sten, + Array4 const& msk) noexcept +{ + AMREX_LOOP_3D(bx, i, j, k, { - if (msk(i,j,k)) { - sol(i,j,k) = Real(0.0); - } else if (sten(i,j,k,ist_000) != Real(0.0)) { - Real Ax = sol(i ,j ,k ) * sten(i ,j ,k ,ist_000) - // - + sol(i-1,j ,k ) * sten(i-1,j ,k ,ist_p00) - + sol(i+1,j ,k ) * sten(i ,j ,k ,ist_p00) - // - + sol(i ,j-1,k ) * sten(i ,j-1,k ,ist_0p0) - + sol(i ,j+1,k ) * sten(i ,j ,k ,ist_0p0) - // - + sol(i ,j ,k-1) * sten(i ,j ,k-1,ist_00p) - + sol(i ,j ,k+1) * sten(i ,j ,k ,ist_00p) - // - + sol(i-1,j-1,k ) * sten(i-1,j-1,k ,ist_pp0) - + sol(i+1,j-1,k ) * sten(i ,j-1,k ,ist_pp0) - + sol(i-1,j+1,k ) * sten(i-1,j ,k ,ist_pp0) - + sol(i+1,j+1,k ) * sten(i ,j ,k ,ist_pp0) - // - + sol(i-1,j ,k-1) * sten(i-1,j ,k-1,ist_p0p) - + sol(i+1,j ,k-1) * sten(i ,j ,k-1,ist_p0p) - + sol(i-1,j ,k+1) * sten(i-1,j ,k ,ist_p0p) - + sol(i+1,j ,k+1) * sten(i ,j ,k ,ist_p0p) - // - + sol(i ,j-1,k-1) * sten(i ,j-1,k-1,ist_0pp) - + sol(i ,j+1,k-1) * sten(i ,j ,k-1,ist_0pp) - + sol(i ,j-1,k+1) * sten(i ,j-1,k ,ist_0pp) - + sol(i ,j+1,k+1) * sten(i ,j ,k ,ist_0pp) - // - + sol(i-1,j-1,k-1) * sten(i-1,j-1,k-1,ist_ppp) - + sol(i+1,j-1,k-1) * sten(i ,j-1,k-1,ist_ppp) - + sol(i-1,j+1,k-1) * sten(i-1,j ,k-1,ist_ppp) - + sol(i+1,j+1,k-1) * sten(i ,j ,k-1,ist_ppp) - + sol(i-1,j-1,k+1) * sten(i-1,j-1,k ,ist_ppp) - + sol(i+1,j-1,k+1) * sten(i ,j-1,k ,ist_ppp) - + sol(i-1,j+1,k+1) * sten(i-1,j ,k ,ist_ppp) - + sol(i+1,j+1,k+1) * sten(i ,j ,k ,ist_ppp); - - sol(i,j,k) += (rhs(i,j,k) - Ax) / sten(i,j,k,ist_000); - } + mlndlap_gauss_seidel_sten(i,j,k,sol,rhs,sten,msk); }); } @@ -10887,5 +10869,239 @@ void mlndlap_fillijmat_cs_gpu (const int ps, const int i, const int j, const int #endif +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +int mlndlap_color (int i, int j, int k) +{ + return (i%2) + (j%2)*2 + (k%2)*4; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_ha (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, Array4 const& sx, + Array4 const& sy, Array4 const& sz, + Array4 const& msk, + GpuArray const& dxinv, int color) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real facx = Real(1.0/36.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/36.0)*dxinv[1]*dxinv[1]; + Real facz = Real(1.0/36.0)*dxinv[2]*dxinv[2]; + + Real s0 = Real(-4.0)*(facx*(sx(i-1,j-1,k-1)+sx(i,j-1,k-1)+sx(i-1,j,k-1)+sx(i,j,k-1) + +sx(i-1,j-1,k )+sx(i,j-1,k )+sx(i-1,j,k )+sx(i,j,k )) + +facy*(sy(i-1,j-1,k-1)+sy(i,j-1,k-1)+sy(i-1,j,k-1)+sy(i,j,k-1) + +sy(i-1,j-1,k )+sy(i,j-1,k )+sy(i-1,j,k )+sy(i,j,k )) + +facz*(sz(i-1,j-1,k-1)+sz(i,j-1,k-1)+sz(i-1,j,k-1)+sz(i,j,k-1) + +sz(i-1,j-1,k )+sz(i,j-1,k )+sz(i-1,j,k )+sz(i,j,k ))); + Real Ax = sol(i,j,k)*s0 + + sol(i-1,j-1,k-1)*(facx*sx(i-1,j-1,k-1) + +facy*sy(i-1,j-1,k-1) + +facz*sz(i-1,j-1,k-1)) + + sol(i+1,j-1,k-1)*(facx*sx(i ,j-1,k-1) + +facy*sy(i ,j-1,k-1) + +facz*sz(i ,j-1,k-1)) + + sol(i-1,j+1,k-1)*(facx*sx(i-1,j ,k-1) + +facy*sy(i-1,j ,k-1) + +facz*sz(i-1,j ,k-1)) + + sol(i+1,j+1,k-1)*(facx*sx(i ,j ,k-1) + +facy*sy(i ,j ,k-1) + +facz*sz(i ,j ,k-1)) + + sol(i-1,j-1,k+1)*(facx*sx(i-1,j-1,k ) + +facy*sy(i-1,j-1,k ) + +facz*sz(i-1,j-1,k )) + + sol(i+1,j-1,k+1)*(facx*sx(i ,j-1,k ) + +facy*sy(i ,j-1,k ) + +facz*sz(i ,j-1,k )) + + sol(i-1,j+1,k+1)*(facx*sx(i-1,j ,k ) + +facy*sy(i-1,j ,k ) + +facz*sz(i-1,j ,k )) + + sol(i+1,j+1,k+1)*(facx*sx(i ,j ,k ) + +facy*sy(i ,j ,k ) + +facz*sz(i ,j ,k )) + +sol(i ,j-1,k-1)*( -facx*(sx(i-1,j-1,k-1)+sx(i,j-1,k-1)) + +Real(2.0)*facy*(sy(i-1,j-1,k-1)+sy(i,j-1,k-1)) + +Real(2.0)*facz*(sz(i-1,j-1,k-1)+sz(i,j-1,k-1))) + +sol(i ,j+1,k-1)*( -facx*(sx(i-1,j ,k-1)+sx(i,j ,k-1)) + +Real(2.0)*facy*(sy(i-1,j ,k-1)+sy(i,j ,k-1)) + +Real(2.0)*facz*(sz(i-1,j ,k-1)+sz(i,j ,k-1))) + +sol(i ,j-1,k+1)*( -facx*(sx(i-1,j-1,k )+sx(i,j-1,k )) + +Real(2.0)*facy*(sy(i-1,j-1,k )+sy(i,j-1,k )) + +Real(2.0)*facz*(sz(i-1,j-1,k )+sz(i,j-1,k ))) + +sol(i ,j+1,k+1)*( -facx*(sx(i-1,j ,k )+sx(i,j ,k )) + +Real(2.0)*facy*(sy(i-1,j ,k )+sy(i,j ,k )) + +Real(2.0)*facz*(sz(i-1,j ,k )+sz(i,j ,k ))) + +sol(i-1,j ,k-1)*( Real(2.0)*facx*(sx(i-1,j-1,k-1)+sx(i-1,j,k-1)) + -facy*(sy(i-1,j-1,k-1)+sy(i-1,j,k-1)) + +Real(2.0)*facz*(sz(i-1,j-1,k-1)+sz(i-1,j,k-1))) + +sol(i+1,j ,k-1)*( Real(2.0)*facx*(sx(i ,j-1,k-1)+sx(i ,j,k-1)) + -facy*(sy(i ,j-1,k-1)+sy(i ,j,k-1)) + +Real(2.0)*facz*(sz(i ,j-1,k-1)+sz(i ,j,k-1))) + +sol(i-1,j ,k+1)*( Real(2.0)*facx*(sx(i-1,j-1,k )+sx(i-1,j,k )) + -facy*(sy(i-1,j-1,k )+sy(i-1,j,k )) + +Real(2.0)*facz*(sz(i-1,j-1,k )+sz(i-1,j,k ))) + +sol(i+1,j ,k+1)*( Real(2.0)*facx*(sx(i ,j-1,k )+sx(i ,j,k )) + -facy*(sy(i ,j-1,k )+sy(i ,j,k )) + +Real(2.0)*facz*(sz(i ,j-1,k )+sz(i ,j,k ))) + +sol(i-1,j-1,k )*( Real(2.0)*facx*(sx(i-1,j-1,k-1)+sx(i-1,j-1,k)) + +Real(2.0)*facy*(sy(i-1,j-1,k-1)+sy(i-1,j-1,k)) + -facz*(sz(i-1,j-1,k-1)+sz(i-1,j-1,k))) + +sol(i+1,j-1,k )*( Real(2.0)*facx*(sx(i ,j-1,k-1)+sx(i ,j-1,k)) + +Real(2.0)*facy*(sy(i ,j-1,k-1)+sy(i ,j-1,k)) + -facz*(sz(i ,j-1,k-1)+sz(i ,j-1,k))) + +sol(i-1,j+1,k )*( Real(2.0)*facx*(sx(i-1,j ,k-1)+sx(i-1,j ,k)) + +Real(2.0)*facy*(sy(i-1,j ,k-1)+sy(i-1,j ,k)) + -facz*(sz(i-1,j ,k-1)+sz(i-1,j ,k))) + +sol(i+1,j+1,k )*( Real(2.0)*facx*(sx(i ,j ,k-1)+sx(i ,j ,k)) + +Real(2.0)*facy*(sy(i ,j ,k-1)+sy(i ,j ,k)) + -facz*(sz(i ,j ,k-1)+sz(i ,j ,k))) + + Real(2.0)*sol(i-1,j,k)*(Real(2.0)*facx*(sx(i-1,j-1,k-1)+sx(i-1,j,k-1)+sx(i-1,j-1,k)+sx(i-1,j,k)) + -facy*(sy(i-1,j-1,k-1)+sy(i-1,j,k-1)+sy(i-1,j-1,k)+sy(i-1,j,k)) + -facz*(sz(i-1,j-1,k-1)+sz(i-1,j,k-1)+sz(i-1,j-1,k)+sz(i-1,j,k))) + + Real(2.0)*sol(i+1,j,k)*(Real(2.0)*facx*(sx(i ,j-1,k-1)+sx(i ,j,k-1)+sx(i ,j-1,k)+sx(i ,j,k)) + -facy*(sy(i ,j-1,k-1)+sy(i ,j,k-1)+sy(i ,j-1,k)+sy(i ,j,k)) + -facz*(sz(i ,j-1,k-1)+sz(i ,j,k-1)+sz(i ,j-1,k)+sz(i ,j,k))) + + Real(2.0)*sol(i,j-1,k)*( -facx*(sx(i-1,j-1,k-1)+sx(i,j-1,k-1)+sx(i-1,j-1,k)+sx(i,j-1,k)) + +Real(2.0)*facy*(sy(i-1,j-1,k-1)+sy(i,j-1,k-1)+sy(i-1,j-1,k)+sy(i,j-1,k)) + -facz*(sz(i-1,j-1,k-1)+sz(i,j-1,k-1)+sz(i-1,j-1,k)+sz(i,j-1,k))) + + Real(2.0)*sol(i,j+1,k)*( -facx*(sx(i-1,j ,k-1)+sx(i,j ,k-1)+sx(i-1,j ,k)+sx(i,j ,k)) + +Real(2.0)*facy*(sy(i-1,j ,k-1)+sy(i,j ,k-1)+sy(i-1,j ,k)+sy(i,j ,k)) + -facz*(sz(i-1,j ,k-1)+sz(i,j ,k-1)+sz(i-1,j ,k)+sz(i,j ,k))) + + Real(2.0)*sol(i,j,k-1)*( -facx*(sx(i-1,j-1,k-1)+sx(i,j-1,k-1)+sx(i-1,j,k-1)+sx(i,j,k-1)) + -facy*(sy(i-1,j-1,k-1)+sy(i,j-1,k-1)+sy(i-1,j,k-1)+sy(i,j,k-1)) + +Real(2.0)*facz*(sz(i-1,j-1,k-1)+sz(i,j-1,k-1)+sz(i-1,j,k-1)+sz(i,j,k-1))) + + Real(2.0)*sol(i,j,k+1)*( -facx*(sx(i-1,j-1,k )+sx(i,j-1,k )+sx(i-1,j,k )+sx(i,j,k )) + -facy*(sy(i-1,j-1,k )+sy(i,j-1,k )+sy(i-1,j,k )+sy(i,j,k )) + +Real(2.0)*facz*(sz(i-1,j-1,k )+sz(i,j-1,k )+sz(i-1,j,k )+sz(i,j,k ))); + + sol(i,j,k) += (rhs(i,j,k) - Ax) / s0; + } + } +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_aa (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, Array4 const& sig, + Array4 const& msk, + GpuArray const& dxinv, int color) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real facx = Real(1.0/36.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/36.0)*dxinv[1]*dxinv[1]; + Real facz = Real(1.0/36.0)*dxinv[2]*dxinv[2]; + Real fxyz = facx + facy + facz; + Real fmx2y2z = -facx + Real(2.0)*facy + Real(2.0)*facz; + Real f2xmy2z = Real(2.0)*facx - facy + Real(2.0)*facz; + Real f2x2ymz = Real(2.0)*facx + Real(2.0)*facy - facz; + Real f4xm2ym2z = Real(4.0)*facx - Real(2.0)*facy - Real(2.0)*facz; + Real fm2x4ym2z = -Real(2.0)*facx + Real(4.0)*facy - Real(2.0)*facz; + Real fm2xm2y4z = -Real(2.0)*facx - Real(2.0)*facy + Real(4.0)*facz; + + Real s0 = Real(-4.0)*fxyz*(sig(i-1,j-1,k-1)+sig(i,j-1,k-1)+sig(i-1,j,k-1)+sig(i,j,k-1) + +sig(i-1,j-1,k )+sig(i,j-1,k )+sig(i-1,j,k )+sig(i,j,k )); + Real Ax = sol(i,j,k)*s0 + + fxyz*(sol(i-1,j-1,k-1)*sig(i-1,j-1,k-1) + + sol(i+1,j-1,k-1)*sig(i ,j-1,k-1) + + sol(i-1,j+1,k-1)*sig(i-1,j ,k-1) + + sol(i+1,j+1,k-1)*sig(i ,j ,k-1) + + sol(i-1,j-1,k+1)*sig(i-1,j-1,k ) + + sol(i+1,j-1,k+1)*sig(i ,j-1,k ) + + sol(i-1,j+1,k+1)*sig(i-1,j ,k ) + + sol(i+1,j+1,k+1)*sig(i ,j ,k )) + + fmx2y2z*(sol(i ,j-1,k-1)*(sig(i-1,j-1,k-1)+sig(i,j-1,k-1)) + + sol(i ,j+1,k-1)*(sig(i-1,j ,k-1)+sig(i,j ,k-1)) + + sol(i ,j-1,k+1)*(sig(i-1,j-1,k )+sig(i,j-1,k )) + + sol(i ,j+1,k+1)*(sig(i-1,j ,k )+sig(i,j ,k ))) + + f2xmy2z*(sol(i-1,j ,k-1)*(sig(i-1,j-1,k-1)+sig(i-1,j,k-1)) + + sol(i+1,j ,k-1)*(sig(i ,j-1,k-1)+sig(i ,j,k-1)) + + sol(i-1,j ,k+1)*(sig(i-1,j-1,k )+sig(i-1,j,k )) + + sol(i+1,j ,k+1)*(sig(i ,j-1,k )+sig(i ,j,k ))) + + f2x2ymz*(sol(i-1,j-1,k )*(sig(i-1,j-1,k-1)+sig(i-1,j-1,k)) + + sol(i+1,j-1,k )*(sig(i ,j-1,k-1)+sig(i ,j-1,k)) + + sol(i-1,j+1,k )*(sig(i-1,j ,k-1)+sig(i-1,j ,k)) + + sol(i+1,j+1,k )*(sig(i ,j ,k-1)+sig(i ,j ,k))) + + f4xm2ym2z*(sol(i-1,j,k)*(sig(i-1,j-1,k-1)+sig(i-1,j,k-1)+sig(i-1,j-1,k)+sig(i-1,j,k)) + + sol(i+1,j,k)*(sig(i ,j-1,k-1)+sig(i ,j,k-1)+sig(i ,j-1,k)+sig(i ,j,k))) + + fm2x4ym2z*(sol(i,j-1,k)*(sig(i-1,j-1,k-1)+sig(i,j-1,k-1)+sig(i-1,j-1,k)+sig(i,j-1,k)) + + sol(i,j+1,k)*(sig(i-1,j ,k-1)+sig(i,j ,k-1)+sig(i-1,j ,k)+sig(i,j ,k))) + + fm2xm2y4z*(sol(i,j,k-1)*(sig(i-1,j-1,k-1)+sig(i,j-1,k-1)+sig(i-1,j,k-1)+sig(i,j,k-1)) + + sol(i,j,k+1)*(sig(i-1,j-1,k )+sig(i,j-1,k )+sig(i-1,j,k )+sig(i,j,k ))); + + sol(i,j,k) += (rhs(i,j,k) - Ax) / s0; + } + } +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_c (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, Real sig, + Array4 const& msk, + GpuArray const& dxinv, int color) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real facx = Real(1.0/36.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/36.0)*dxinv[1]*dxinv[1]; + Real facz = Real(1.0/36.0)*dxinv[2]*dxinv[2]; + Real fxyz = facx + facy + facz; + Real fmx2y2z = -facx + Real(2.0)*facy + Real(2.0)*facz; + Real f2xmy2z = Real(2.0)*facx - facy + Real(2.0)*facz; + Real f2x2ymz = Real(2.0)*facx + Real(2.0)*facy - facz; + Real f4xm2ym2z = Real(4.0)*facx - Real(2.0)*facy - Real(2.0)*facz; + Real fm2x4ym2z = -Real(2.0)*facx + Real(4.0)*facy - Real(2.0)*facz; + Real fm2xm2y4z = -Real(2.0)*facx - Real(2.0)*facy + Real(4.0)*facz; + + Real s0 = Real(-4.0)*fxyz*Real(8.); + Real Ax = sol(i,j,k)*s0 + + fxyz*(sol(i-1,j-1,k-1) + + sol(i+1,j-1,k-1) + + sol(i-1,j+1,k-1) + + sol(i+1,j+1,k-1) + + sol(i-1,j-1,k+1) + + sol(i+1,j-1,k+1) + + sol(i-1,j+1,k+1) + + sol(i+1,j+1,k+1)) + + fmx2y2z*(sol(i ,j-1,k-1)*Real(2.) + + sol(i ,j+1,k-1)*Real(2.) + + sol(i ,j-1,k+1)*Real(2.) + + sol(i ,j+1,k+1)*Real(2.)) + + f2xmy2z*(sol(i-1,j ,k-1)*Real(2.) + + sol(i+1,j ,k-1)*Real(2.) + + sol(i-1,j ,k+1)*Real(2.) + + sol(i+1,j ,k+1)*Real(2.)) + + f2x2ymz*(sol(i-1,j-1,k )*Real(2.) + + sol(i+1,j-1,k )*Real(2.) + + sol(i-1,j+1,k )*Real(2.) + + sol(i+1,j+1,k )*Real(2.)) + + f4xm2ym2z*(sol(i-1,j,k)*Real(4.) + + sol(i+1,j,k)*Real(4.)) + + fm2x4ym2z*(sol(i,j-1,k)*Real(4.) + + sol(i,j+1,k)*Real(4.)) + + fm2xm2y4z*(sol(i,j,k-1)*Real(4.) + + sol(i,j,k+1)*Real(4.)); + + sol(i,j,k) += (rhs(i,j,k) - Ax*sig) / (s0*sig); + } + } +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlndlap_gscolor_sten (int i, int j, int k, Array4 const& sol, + Array4 const& rhs, + Array4 const& sten, + Array4 const& msk, int color) noexcept +{ + if (mlndlap_color(i,j,k) == color) { + mlndlap_gauss_seidel_sten(i,j,k,sol,rhs,sten,msk); + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp index da90f84e829..8e490f30348 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp @@ -356,95 +356,38 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& const iMultiFab& dmsk = *m_dirichlet_mask[amrlev][mglev]; #ifdef AMREX_USE_GPU - if (Gpu::inLaunchRegion()) + auto const& solarr_ma = sol.arrays(); + auto const& rhsarr_ma = rhs.const_arrays(); + auto const& dmskarr_ma = dmsk.const_arrays(); +#else + bool regular_coarsening = true; + if (amrlev == 0 && mglev > 0) { - auto solarr_ma = sol.arrays(); - auto rhsarr_ma = rhs.const_arrays(); - auto dmskarr_ma = dmsk.const_arrays(); - if (m_coarsening_strategy == CoarseningStrategy::RAP) - { - auto starr_ma = stencil->const_arrays(); - for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) - { - ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Real Ax = mlndlap_adotx_sten(i,j,k,solarr_ma[box_no],starr_ma[box_no],dmskarr_ma[box_no]); - mlndlap_jacobi_sten(i,j,k,solarr_ma[box_no],Ax,rhsarr_ma[box_no],starr_ma[box_no],dmskarr_ma[box_no]); - }); - } - } - else if (sigma[0] == nullptr) - { - for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) - { - Real const_sigma = m_const_sigma; - ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Real Ax = mlndlap_adotx_c(i,j,k,solarr_ma[box_no],const_sigma,dmskarr_ma[box_no], -#if (AMREX_SPACEDIM == 2) - is_rz, -#endif - dxinvarr); - mlndlap_jacobi_c(i,j,k, solarr_ma[box_no], Ax, rhsarr_ma[box_no], const_sigma, - dmskarr_ma[box_no], dxinvarr); - }); - } - } - else if ((m_use_harmonic_average && mglev > 0) || m_use_mapped) - { - AMREX_D_TERM(MultiArray4 const& sxarr_ma = sigma[0]->const_arrays();, - MultiArray4 const& syarr_ma = sigma[1]->const_arrays();, - MultiArray4 const& szarr_ma = sigma[2]->const_arrays();); - for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) - { - ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Real Ax = mlndlap_adotx_ha(i,j,k,solarr_ma[box_no],AMREX_D_DECL(sxarr_ma[box_no],syarr_ma[box_no],szarr_ma[box_no]), dmskarr_ma[box_no], -#if (AMREX_SPACEDIM == 2) - is_rz, + regular_coarsening = mg_coarsen_ratio_vec[mglev-1] == mg_coarsen_ratio; + } + if (sigma[0] == nullptr) { + AMREX_ALWAYS_ASSERT(regular_coarsening); + } #endif - dxinvarr); - mlndlap_jacobi_ha(i,j,k, solarr_ma[box_no], Ax, rhsarr_ma[box_no], AMREX_D_DECL(sxarr_ma[box_no],syarr_ma[box_no],szarr_ma[box_no]), - dmskarr_ma[box_no], dxinvarr); - }); - } - } - else + + if (m_use_gauss_seidel) + { + if (m_coarsening_strategy == CoarseningStrategy::RAP) { - auto sarr_ma = sigma[0]->const_arrays(); - for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { - ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + auto const& starr_ma = stencil->const_arrays(); + for (int color = 0; color < AMREX_D_TERM(2,*2,*2); ++color) { - Real Ax = mlndlap_adotx_aa(i,j,k,solarr_ma[box_no],sarr_ma[box_no],dmskarr_ma[box_no], -#if (AMREX_SPACEDIM == 2) - is_rz, -#endif - dxinvarr); - mlndlap_jacobi_aa(i,j,k, solarr_ma[box_no], Ax, rhsarr_ma[box_no], sarr_ma[box_no], - dmskarr_ma[box_no], dxinvarr); - }); - } - } - - Gpu::streamSynchronize(); - if (m_smooth_num_sweeps > 1) { nodalSync(amrlev, mglev, sol); } - } - else // cpu + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + mlndlap_gscolor_sten(i,j,k,solarr_ma[box_no],rhsarr_ma[box_no], + starr_ma[box_no],dmskarr_ma[box_no],color); + }); + } + } else #endif - { - bool regular_coarsening = true; - if (amrlev == 0 && mglev > 0) - { - regular_coarsening = mg_coarsen_ratio_vec[mglev-1] == mg_coarsen_ratio; - } - if (sigma[0] == nullptr) { - AMREX_ALWAYS_ASSERT(regular_coarsening); - } - - if (m_use_gauss_seidel) - { - if (m_coarsening_strategy == CoarseningStrategy::RAP) { #ifdef AMREX_USE_OMP #pragma omp parallel @@ -462,9 +405,27 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& } } } - else if (sigma[0] == nullptr) + } + else if (sigma[0] == nullptr) + { + Real const_sigma = m_const_sigma; +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + for (int color = 0; color < AMREX_D_TERM(2,*2,*2); ++color) + { + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + mlndlap_gscolor_c(i,j,k, solarr_ma[box_no], rhsarr_ma[box_no], + const_sigma, dmskarr_ma[box_no], dxinvarr, color +#if (AMREX_SPACEDIM == 2) + ,is_rz +#endif + ); + }); + } + } else +#endif { - Real const_sigma = m_const_sigma; #ifdef AMREX_USE_OMP #pragma omp parallel #endif @@ -485,8 +446,32 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& } } } - else if ( (m_use_harmonic_average && mglev > 0) || m_use_mapped ) + } + else if ( (m_use_harmonic_average && mglev > 0) || m_use_mapped ) + { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + AMREX_D_TERM(MultiArray4 const& sxarr_ma = sigma[0]->const_arrays();, + MultiArray4 const& syarr_ma = sigma[1]->const_arrays();, + MultiArray4 const& szarr_ma = sigma[2]->const_arrays();); + for (int color = 0; color < AMREX_D_TERM(2,*2,*2); ++color) + { + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + mlndlap_gscolor_ha(i,j,k, solarr_ma[box_no], rhsarr_ma[box_no], + AMREX_D_DECL(sxarr_ma[box_no],syarr_ma[box_no],szarr_ma[box_no]), + dmskarr_ma[box_no], dxinvarr, color +#if (AMREX_SPACEDIM == 2) + ,is_rz +#endif + ); + }); + } + } else +#endif + { + #ifdef AMREX_USE_OMP #pragma omp parallel #endif @@ -511,51 +496,94 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& } } } - else + } + else + { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) + { + auto const& sarr_ma = sigma[0]->const_arrays(); + for (int color = 0; color < AMREX_D_TERM(2,*2,*2); ++color) + { + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + mlndlap_gscolor_aa(i,j,k, solarr_ma[box_no], rhsarr_ma[box_no], + sarr_ma[box_no], dmskarr_ma[box_no], dxinvarr, color +#if (AMREX_SPACEDIM == 2) + ,is_rz +#endif + ); + }); + } + } else +#endif { #ifdef AMREX_USE_OMP #pragma omp parallel #endif for (MFIter mfi(sol); mfi.isValid(); ++mfi) { - const Box& bx = mfi.validbox(); Array4 const& sarr = sigma[0]->const_array(mfi); Array4 const& solarr = sol.array(mfi); Array4 const& rhsarr = rhs.const_array(mfi); Array4 const& dmskarr = dmsk.const_array(mfi); +#ifndef AMREX_USE_GPU if ( regular_coarsening ) +#endif { for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) { mlndlap_gauss_seidel_aa(bx, solarr, rhsarr, sarr, dmskarr, dxinvarr #if (AMREX_SPACEDIM == 2) - ,is_rz + ,is_rz #endif - ); + ); } - } else { + } +#ifndef AMREX_USE_GPU + else { for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) { mlndlap_gauss_seidel_with_line_solve_aa(bx, solarr, rhsarr, sarr, dmskarr, dxinvarr #if (AMREX_SPACEDIM == 2) - ,is_rz + ,is_rz #endif ); } } +#endif } } - - nodalSync(amrlev, mglev, sol); } - else - { - MultiFab Ax(sol.boxArray(), sol.DistributionMap(), 1, 0); - Fapply(amrlev, mglev, Ax, sol); - if (m_coarsening_strategy == CoarseningStrategy::RAP) + Gpu::streamSynchronize(); + nodalSync(amrlev, mglev, sol); + } + else + { + MultiFab Ax(sol.boxArray(), sol.DistributionMap(), 1, 0); + Fapply(amrlev, mglev, Ax, sol); + +#ifdef AMREX_USE_GPU + auto const& Axarr_ma = Ax.const_arrays(); +#endif + + if (m_coarsening_strategy == CoarseningStrategy::RAP) + { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) + { + auto const& starr_ma = stencil->const_arrays(); + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + mlndlap_jacobi_sten(i,j,k,solarr_ma[box_no],Axarr_ma[box_no](i,j,k), + rhsarr_ma[box_no],starr_ma[box_no], + dmskarr_ma[box_no]); + }); + } else +#endif { #ifdef AMREX_USE_OMP #pragma omp parallel @@ -572,9 +600,22 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& mlndlap_jacobi_sten(bx,solarr,Axarr,rhsarr,stenarr,dmskarr); } } - else if (sigma[0] == nullptr) + } + else if (sigma[0] == nullptr) + { + Real const_sigma = m_const_sigma; +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) + { + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + mlndlap_jacobi_c(i,j,k,solarr_ma[box_no],Axarr_ma[box_no](i,j,k), + rhsarr_ma[box_no],const_sigma, + dmskarr_ma[box_no], dxinvarr); + }); + } else +#endif { - Real const_sigma = m_const_sigma; #ifdef AMREX_USE_OMP #pragma omp parallel #endif @@ -590,7 +631,23 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& dmskarr, dxinvarr); } } - else if ( (m_use_harmonic_average && mglev > 0) || m_use_mapped ) + } + else if ( (m_use_harmonic_average && mglev > 0) || m_use_mapped ) + { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) + { + AMREX_D_TERM(MultiArray4 const& sxarr_ma = sigma[0]->const_arrays();, + MultiArray4 const& syarr_ma = sigma[1]->const_arrays();, + MultiArray4 const& szarr_ma = sigma[2]->const_arrays();); + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + mlndlap_jacobi_ha(i,j,k,solarr_ma[box_no],Axarr_ma[box_no](i,j,k),rhsarr_ma[box_no], + AMREX_D_DECL(sxarr_ma[box_no],syarr_ma[box_no],szarr_ma[box_no]), + dmskarr_ma[box_no], dxinvarr); + }); + } else +#endif { #ifdef AMREX_USE_OMP #pragma omp parallel @@ -610,7 +667,21 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& dmskarr, dxinvarr); } } - else + } + else + { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) + { + auto const& sarr_ma = sigma[0]->const_arrays(); + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + mlndlap_jacobi_aa(i,j,k,solarr_ma[box_no],Axarr_ma[box_no](i,j,k), + rhsarr_ma[box_no],sarr_ma[box_no], + dmskarr_ma[box_no], dxinvarr); + }); + } else +#endif { #ifdef AMREX_USE_OMP #pragma omp parallel @@ -629,6 +700,8 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& } } } + + Gpu::streamSynchronize(); } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H index 0a5b90a7bab..6fecf13eafb 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H @@ -367,6 +367,12 @@ MLPoissonT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redb { BL_PROFILE("MLPoisson::Fsmooth()"); + MF Ax; + if (! this->m_use_gauss_seidel) { // jacobi + Ax.define(sol.boxArray(), sol.DistributionMap(), sol.nComp(), 0); + Fapply(amrlev, mglev, Ax, sol); + } + const auto& undrrelxr = this->m_undrrelxr[amrlev][mglev]; const auto& maskvals = this->m_maskvals [amrlev][mglev]; @@ -443,122 +449,130 @@ MLPoissonT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redb #endif #endif -#if (AMREX_SPACEDIM == 1) if (this->m_overset_mask[amrlev][mglev]) { AMREX_ASSERT(!this->m_has_metric_term); const auto& osmma = this->m_overset_mask[amrlev][mglev]->const_arrays(); - ParallelFor(sol, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Box vbx(rhsma[box_no]); - mlpoisson_gsrb_os(i, j, k, solnma[box_no], rhsma[box_no], - osmma[box_no], dhx, - f0ma[box_no], m0ma[box_no], - f1ma[box_no], m1ma[box_no], - vbx, redblack); - }); - } else if (this->m_has_metric_term) { - ParallelFor(sol, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Box vbx(rhsma[box_no]); - mlpoisson_gsrb_m(i, j, k, solnma[box_no], rhsma[box_no], dhx, - f0ma[box_no], m0ma[box_no], - f1ma[box_no], m1ma[box_no], - vbx, redblack, - dx, probxlo); - }); - } else { - ParallelFor(sol, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Box vbx(rhsma[box_no]); - mlpoisson_gsrb(i, j, k, solnma[box_no], rhsma[box_no], dhx, - f0ma[box_no], m0ma[box_no], - f1ma[box_no], m1ma[box_no], - vbx, redblack); - }); - } + if (this->m_use_gauss_seidel) { + ParallelFor(sol, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + Box vbx(rhsma[box_no]); + mlpoisson_gsrb_os(i, j, k, solnma[box_no], rhsma[box_no], + osmma[box_no], AMREX_D_DECL(dhx, dhy, dhz), + f0ma[box_no], m0ma[box_no], + f1ma[box_no], m1ma[box_no], +#if (AMREX_SPACEDIM > 1) + f2ma[box_no], m2ma[box_no], + f3ma[box_no], m3ma[box_no], +#if (AMREX_SPACEDIM > 2) + f4ma[box_no], m4ma[box_no], + f5ma[box_no], m5ma[box_no], #endif - -#if (AMREX_SPACEDIM == 2) - if (this->m_overset_mask[amrlev][mglev]) { - AMREX_ASSERT(!this->m_has_metric_term); - const auto& osmma = this->m_overset_mask[amrlev][mglev]->const_arrays(); - ParallelFor(sol, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Box vbx(rhsma[box_no]); - mlpoisson_gsrb_os(i, j, k, solnma[box_no], rhsma[box_no], - osmma[box_no], dhx, dhy, - f0ma[box_no], m0ma[box_no], - f1ma[box_no], m1ma[box_no], - f2ma[box_no], m2ma[box_no], - f3ma[box_no], m3ma[box_no], - vbx, redblack); - }); - } else if (this->m_has_metric_term) { - ParallelFor(sol, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Box vbx(rhsma[box_no]); - mlpoisson_gsrb_m(i, j, k, solnma[box_no], rhsma[box_no], dhx, dhy, - f0ma[box_no], m0ma[box_no], - f1ma[box_no], m1ma[box_no], - f2ma[box_no], m2ma[box_no], - f3ma[box_no], m3ma[box_no], - vbx, redblack, - dx, probxlo); - }); - } else { - ParallelFor(sol, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Box vbx(rhsma[box_no]); - mlpoisson_gsrb(i, j, k, solnma[box_no], rhsma[box_no], dhx, dhy, - f0ma[box_no], m0ma[box_no], - f1ma[box_no], m1ma[box_no], - f2ma[box_no], m2ma[box_no], - f3ma[box_no], m3ma[box_no], - vbx, redblack); - }); +#endif + vbx, redblack); + }); + } else { + const auto& axma = Ax.const_arrays(); + ParallelFor(sol, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + Box vbx(rhsma[box_no]); + mlpoisson_jacobi_os(i, j, k, solnma[box_no], rhsma[box_no], + axma[box_no], osmma[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + f0ma[box_no], m0ma[box_no], + f1ma[box_no], m1ma[box_no], +#if (AMREX_SPACEDIM > 1) + f2ma[box_no], m2ma[box_no], + f3ma[box_no], m3ma[box_no], +#if (AMREX_SPACEDIM > 2) + f4ma[box_no], m4ma[box_no], + f5ma[box_no], m5ma[box_no], +#endif +#endif + vbx); + }); + } } +#if (AMREX_SPACEDIM < 3) + else if (this->m_has_metric_term) { + if (this->m_use_gauss_seidel) { + ParallelFor(sol, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + Box vbx(rhsma[box_no]); + mlpoisson_gsrb_m(i, j, k, solnma[box_no], rhsma[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + f0ma[box_no], m0ma[box_no], + f1ma[box_no], m1ma[box_no], +#if (AMREX_SPACEDIM > 1) + f2ma[box_no], m2ma[box_no], + f3ma[box_no], m3ma[box_no], #endif - -#if (AMREX_SPACEDIM == 3) - if (this->m_overset_mask[amrlev][mglev]) { - AMREX_ASSERT(!this->m_has_metric_term); - const auto& osmma = this->m_overset_mask[amrlev][mglev]->const_arrays(); - ParallelFor(sol, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Box vbx(rhsma[box_no]); - mlpoisson_gsrb_os(i, j, k, solnma[box_no], rhsma[box_no], - osmma[box_no], dhx, dhy, dhz, - f0ma[box_no], m0ma[box_no], - f1ma[box_no], m1ma[box_no], - f2ma[box_no], m2ma[box_no], - f3ma[box_no], m3ma[box_no], - f4ma[box_no], m4ma[box_no], - f5ma[box_no], m5ma[box_no], - vbx, redblack); - }); - } else { - ParallelFor(sol, - [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept - { - Box vbx(rhsma[box_no]); - mlpoisson_gsrb(i, j, k, solnma[box_no], rhsma[box_no], dhx, dhy, dhz, - f0ma[box_no], m0ma[box_no], - f1ma[box_no], m1ma[box_no], - f2ma[box_no], m2ma[box_no], - f3ma[box_no], m3ma[box_no], - f4ma[box_no], m4ma[box_no], - f5ma[box_no], m5ma[box_no], - vbx, redblack); - }); + vbx, redblack, + dx, probxlo); + }); + } else { + const auto& axma = Ax.const_arrays(); + ParallelFor(sol, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + Box vbx(rhsma[box_no]); + mlpoisson_jacobi_m(i, j, k, solnma[box_no], rhsma[box_no], + axma[box_no], AMREX_D_DECL(dhx, dhy, dhz), + f0ma[box_no], m0ma[box_no], + f1ma[box_no], m1ma[box_no], +#if (AMREX_SPACEDIM > 1) + f2ma[box_no], m2ma[box_no], + f3ma[box_no], m3ma[box_no], +#endif + vbx, dx, probxlo); + }); + } } #endif + else { + if (this->m_use_gauss_seidel) { + ParallelFor(sol, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + Box vbx(rhsma[box_no]); + mlpoisson_gsrb(i, j, k, solnma[box_no], rhsma[box_no], + AMREX_D_DECL(dhx, dhy, dhz), + f0ma[box_no], m0ma[box_no], + f1ma[box_no], m1ma[box_no], +#if (AMREX_SPACEDIM > 1) + f2ma[box_no], m2ma[box_no], + f3ma[box_no], m3ma[box_no], +#if (AMREX_SPACEDIM > 2) + f4ma[box_no], m4ma[box_no], + f5ma[box_no], m5ma[box_no], +#endif +#endif + vbx, redblack); + }); + } else { + const auto& axma = Ax.const_arrays(); + ParallelFor(sol, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + Box vbx(rhsma[box_no]); + mlpoisson_jacobi(i, j, k, solnma[box_no], rhsma[box_no], + axma[box_no], AMREX_D_DECL(dhx, dhy, dhz), + f0ma[box_no], m0ma[box_no], + f1ma[box_no], m1ma[box_no], +#if (AMREX_SPACEDIM > 1) + f2ma[box_no], m2ma[box_no], + f3ma[box_no], m3ma[box_no], +#if (AMREX_SPACEDIM > 2) + f4ma[box_no], m4ma[box_no], + f5ma[box_no], m5ma[box_no], +#endif +#endif + vbx); + }); + } + } } else #endif { @@ -598,30 +612,64 @@ MLPoissonT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redb if (this->m_overset_mask[amrlev][mglev]) { AMREX_ASSERT(!this->m_has_metric_term); const auto& osm = this->m_overset_mask[amrlev][mglev]->const_array(mfi); - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, - { - mlpoisson_gsrb_os(i, j, k, solnfab, rhsfab, osm, dhx, - f0fab, m0, - f1fab, m1, - vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_gsrb_os(i, j, k, solnfab, rhsfab, osm, dhx, + f0fab, m0, + f1fab, m1, + vbx, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_jacobi_os(i, j, k, solnfab, rhsfab, axfab, + osm, dhx, + f0fab, m0, + f1fab, m1, + vbx); + }); + } } else if (this->m_has_metric_term) { - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, - { - mlpoisson_gsrb_m(i, j, k, solnfab, rhsfab, dhx, - f0fab, m0, - f1fab, m1, - vbx, redblack, - dx, probxlo); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_gsrb_m(i, j, k, solnfab, rhsfab, dhx, + f0fab, m0, + f1fab, m1, + vbx, redblack, + dx, probxlo); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_jacobi_m(i, j, k, solnfab, rhsfab, axfab, dhx, + f0fab, m0, + f1fab, m1, + vbx, dx, probxlo); + }); + } } else { - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, - { - mlpoisson_gsrb(i, j, k, solnfab, rhsfab, dhx, - f0fab, m0, - f1fab, m1, - vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_gsrb(i, j, k, solnfab, rhsfab, dhx, + f0fab, m0, + f1fab, m1, + vbx, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_jacobi(i, j, k, solnfab, rhsfab, axfab, dhx, + f0fab, m0, + f1fab, m1, + vbx); + }); + } } #endif @@ -629,55 +677,110 @@ MLPoissonT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redb if (this->m_overset_mask[amrlev][mglev]) { AMREX_ASSERT(!this->m_has_metric_term); const auto& osm = this->m_overset_mask[amrlev][mglev]->const_array(mfi); - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, - { - mlpoisson_gsrb_os(i, j, k, solnfab, rhsfab, osm, dhx, dhy, - f0fab, m0, - f1fab, m1, - f2fab, m2, - f3fab, m3, - vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_gsrb_os(i, j, k, solnfab, rhsfab, osm, dhx, dhy, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + vbx, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_jacobi_os(i, j, k, solnfab, rhsfab, axfab, + osm, dhx, dhy, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + vbx); + }); + } } else if (this->m_has_metric_term) { - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, - { - mlpoisson_gsrb_m(i, j, k, solnfab, rhsfab, dhx, dhy, - f0fab, m0, - f1fab, m1, - f2fab, m2, - f3fab, m3, - vbx, redblack, - dx, probxlo); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_gsrb_m(i, j, k, solnfab, rhsfab, dhx, dhy, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + vbx, redblack, + dx, probxlo); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_jacobi_m(i, j, k, solnfab, rhsfab, axfab, dhx, dhy, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + vbx, dx, probxlo); + }); + } } else { - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, - { - mlpoisson_gsrb(i, j, k, solnfab, rhsfab, dhx, dhy, - f0fab, m0, - f1fab, m1, - f2fab, m2, - f3fab, m3, - vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_gsrb(i, j, k, solnfab, rhsfab, dhx, dhy, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + vbx, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_jacobi(i, j, k, solnfab, rhsfab, axfab, dhx, dhy, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + vbx); + }); + } } - #endif #if (AMREX_SPACEDIM == 3) if (this->m_overset_mask[amrlev][mglev]) { AMREX_ASSERT(!this->m_has_metric_term); const auto& osm = this->m_overset_mask[amrlev][mglev]->const_array(mfi); - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, - { - mlpoisson_gsrb_os(i, j, k, solnfab, rhsfab, osm, dhx, dhy, dhz, - f0fab, m0, - f1fab, m1, - f2fab, m2, - f3fab, m3, - f4fab, m4, - f5fab, m5, - vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_gsrb_os(i, j, k, solnfab, rhsfab, osm, dhx, dhy, dhz, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + f4fab, m4, + f5fab, m5, + vbx, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_jacobi_os(i, j, k, solnfab, rhsfab, axfab, + osm, dhx, dhy, dhz, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + f4fab, m4, + f5fab, m5, + vbx); + }); + } } else if (this->hasHiddenDimension()) { Box const& tbx_2d = this->compactify(tbx); Box const& vbx_2d = this->compactify(vbx); @@ -691,27 +794,58 @@ MLPoissonT::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redb const auto& m1_2d = this->compactify(this->get_d1(m0,m1,m2)); const auto& m2_2d = this->compactify(this->get_d0(m3,m4,m5)); const auto& m3_2d = this->compactify(this->get_d1(m3,m4,m5)); - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx_2d, i, j, k, - { - TwoD::mlpoisson_gsrb(i, j, k, solnfab_2d, rhsfab_2d, dh0, dh1, - f0fab_2d, m0_2d, - f1fab_2d, m1_2d, - f2fab_2d, m2_2d, - f3fab_2d, m3_2d, - vbx_2d, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx_2d, i, j, k, + { + TwoD::mlpoisson_gsrb(i, j, k, solnfab_2d, rhsfab_2d, dh0, dh1, + f0fab_2d, m0_2d, + f1fab_2d, m1_2d, + f2fab_2d, m2_2d, + f3fab_2d, m3_2d, + vbx_2d, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + const auto& axfab_2d = this->compactify(axfab); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx_2d, i, j, k, + { + TwoD::mlpoisson_jacobi(i, j, k, solnfab_2d, rhsfab_2d, + axfab_2d, dh0, dh1, + f0fab_2d, m0_2d, + f1fab_2d, m1_2d, + f2fab_2d, m2_2d, + f3fab_2d, m3_2d, + vbx_2d); + }); + } } else { - AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, - { - mlpoisson_gsrb(i, j, k, solnfab, rhsfab, dhx, dhy, dhz, - f0fab, m0, - f1fab, m1, - f2fab, m2, - f3fab, m3, - f4fab, m4, - f5fab, m5, - vbx, redblack); - }); + if (this->m_use_gauss_seidel) { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_gsrb(i, j, k, solnfab, rhsfab, dhx, dhy, dhz, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + f4fab, m4, + f5fab, m5, + vbx, redblack); + }); + } else { + const auto& axfab = Ax.const_array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D ( tbx, i, j, k, + { + mlpoisson_jacobi(i, j, k, solnfab, rhsfab, axfab, + dhx, dhy, dhz, + f0fab, m0, + f1fab, m1, + f2fab, m2, + f3fab, m3, + f4fab, m4, + f5fab, m5, + vbx); + }); + } } #endif } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLPoisson_1D_K.H index 071e97b4ea7..59257cf8b1c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson_1D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson_1D_K.H @@ -188,6 +188,81 @@ void mlpoisson_gsrb_m (int i, int, int, Array4 const& phi, Array4 co } } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlpoisson_jacobi (int i, int, int, Array4 const& phi, Array4 const& rhs, + Array4 const& Ax, T dhx, + Array4 const& f0, Array4 const& m0, + Array4 const& f1, Array4 const& m1, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T gamma = -dhx*T(2.0); + + T cf0 = (i == vlo.x && m0(vlo.x-1,0,0) > 0) + ? f0(vlo.x,0,0) : T(0.0); + T cf1 = (i == vhi.x && m1(vhi.x+1,0,0) > 0) + ? f1(vhi.x,0,0) : T(0.0); + + T g_m_d = gamma + dhx*(cf0+cf1); + + phi(i,0,0) += T(2.0/3.0) * (rhs(i,0,0) - Ax(i,0,0)) / g_m_d; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlpoisson_jacobi_os (int i, int, int, Array4 const& phi, Array4 const& rhs, + Array4 const& Ax, Array4 const& osm, T dhx, + Array4 const& f0, Array4 const& m0, + Array4 const& f1, Array4 const& m1, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + if (osm(i,0,0) == 0) { + phi(i,0,0) = T(0.0); + } else { + T cf0 = (i == vlo.x && m0(vlo.x-1,0,0) > 0) + ? f0(vlo.x,0,0) : T(0.0); + T cf1 = (i == vhi.x && m1(vhi.x+1,0,0) > 0) + ? f1(vhi.x,0,0) : T(0.0); + + T gamma = -dhx*T(2.0); + T g_m_d = gamma + dhx*(cf0+cf1); + + phi(i,0,0) += T(2.0/3.0) * (rhs(i,0,0) - Ax(i,0,0)) / g_m_d; + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlpoisson_jacobi_m (int i, int, int, Array4 const& phi, Array4 const& rhs, + Array4 const& Ax, T dhx, + Array4 const& f0, Array4 const& m0, + Array4 const& f1, Array4 const& m1, + Box const& vbox, T dx, T probxlo) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,0,0) > 0) + ? f0(vlo.x,0,0) : T(0.0); + T cf1 = (i == vhi.x && m1(vhi.x+1,0,0) > 0) + ? f1(vhi.x,0,0) : T(0.0); + + T rel = (probxlo + i *dx) * (probxlo + i *dx); + T rer = (probxlo +(i+1)*dx) * (probxlo +(i+1)*dx); + + T gamma = -dhx*(rel+rer); + + T g_m_d = gamma + dhx*(rel*cf0+rer*cf1); + + phi(i,0,0) += T(2.0/3.0) * (rhs(i,0,0) - Ax(i,0,0)) / g_m_d; +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlpoisson_normalize (int i, int, int, Array4 const& x, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLPoisson_2D_K.H index 9604de38feb..5feba2a0066 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson_2D_K.H @@ -305,6 +305,102 @@ void mlpoisson_gsrb_m (int i, int j, int, Array4 const& phi, Array4 } } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlpoisson_jacobi (int i, int j, int, Array4 const& phi, Array4 const& rhs, + Array4 const& Ax, T dhx, T dhy, + Array4 const& f0, Array4 const& m0, + Array4 const& f1, Array4 const& m1, + Array4 const& f2, Array4 const& m2, + Array4 const& f3, Array4 const& m3, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T gamma = T(-2.0)*(dhx+dhy); + + T cf0 = (i == vlo.x && m0(vlo.x-1,j,0) > 0) + ? f0(vlo.x,j,0) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,0) > 0) + ? f1(i,vlo.y,0) : T(0.0); + T cf2 = (i == vhi.x && m2(vhi.x+1,j,0) > 0) + ? f2(vhi.x,j,0) : T(0.0); + T cf3 = (j == vhi.y && m3(i,vhi.y+1,0) > 0) + ? f3(i,vhi.y,0) : T(0.0); + + T g_m_d = gamma + dhx*(cf0+cf2) + dhy*(cf1+cf3); + + phi(i,j,0) += T(2.0/3.0) * (rhs(i,j,0) - Ax(i,j,0)) / g_m_d; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlpoisson_jacobi_os (int i, int j, int, Array4 const& phi, Array4 const& rhs, + Array4 const& Ax, Array4 const& osm, + T dhx, T dhy, + Array4 const& f0, Array4 const& m0, + Array4 const& f1, Array4 const& m1, + Array4 const& f2, Array4 const& m2, + Array4 const& f3, Array4 const& m3, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T gamma = T(-2.0)*(dhx+dhy); + + if (osm(i,j,0) == 0) { + phi(i,j,0) = T(0.0); + } else { + T cf0 = (i == vlo.x && m0(vlo.x-1,j,0) > 0) + ? f0(vlo.x,j,0) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,0) > 0) + ? f1(i,vlo.y,0) : T(0.0); + T cf2 = (i == vhi.x && m2(vhi.x+1,j,0) > 0) + ? f2(vhi.x,j,0) : T(0.0); + T cf3 = (j == vhi.y && m3(i,vhi.y+1,0) > 0) + ? f3(i,vhi.y,0) : T(0.0); + + T g_m_d = gamma + dhx*(cf0+cf2) + dhy*(cf1+cf3); + + phi(i,j,0) += T(2.0/3.0) * (rhs(i,j,0) - Ax(i,j,0)) / g_m_d; + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlpoisson_jacobi_m (int i, int j, int, Array4 const& phi, Array4 const& rhs, + Array4 const& Ax, T dhx, T dhy, + Array4 const& f0, Array4 const& m0, + Array4 const& f1, Array4 const& m1, + Array4 const& f2, Array4 const& m2, + Array4 const& f3, Array4 const& m3, + Box const& vbox, T dx, T probxlo) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + T cf0 = (i == vlo.x && m0(vlo.x-1,j,0) > 0) + ? f0(vlo.x,j,0) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,0) > 0) + ? f1(i,vlo.y,0) : T(0.0); + T cf2 = (i == vhi.x && m2(vhi.x+1,j,0) > 0) + ? f2(vhi.x,j,0) : T(0.0); + T cf3 = (j == vhi.y && m3(i,vhi.y+1,0) > 0) + ? f3(i,vhi.y,0) : T(0.0); + + T rel = probxlo + i*dx; + T rer = probxlo +(i+1)*dx; + T rc = probxlo + (i+T(0.5))*dx; + + T gamma = -dhx*(rel+rer) - T(2.0)*dhy*rc; + + T g_m_d = gamma + dhx*(rel*cf0+rer*cf2) + dhy*rc*(cf1+cf3); + + phi(i,j,0) += T(2.0/3.0) * (rhs(i,j,0) - Ax(i,j,0)) / g_m_d; +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlpoisson_normalize (int i, int j, int, Array4 const& x, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLPoisson_3D_K.H index e0823294d03..fa23bc4b6dd 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson_3D_K.H @@ -245,6 +245,84 @@ void mlpoisson_gsrb_os (int i, int j, int k, Array4 const& phi, } } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlpoisson_jacobi (int i, int j, int k, Array4 const& phi, + Array4 const& rhs, Array4 const& Ax, + T dhx, T dhy, T dhz, + Array4 const& f0, Array4 const& m0, + Array4 const& f1, Array4 const& m1, + Array4 const& f2, Array4 const& m2, + Array4 const& f3, Array4 const& m3, + Array4 const& f4, Array4 const& m4, + Array4 const& f5, Array4 const& m5, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + const T gamma = T(-2.)*(dhx+dhy+dhz); + + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k) : T(0.0); + T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) + ? f2(i,j,vlo.z) : T(0.0); + T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) + ? f3(vhi.x,j,k) : T(0.0); + T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) + ? f4(i,vhi.y,k) : T(0.0); + T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) + ? f5(i,j,vhi.z) : T(0.0); + + T g_m_d = gamma + dhx*(cf0+cf3) + dhy*(cf1+cf4) + dhz*(cf2+cf5); + + phi(i,j,k) += T(2.0/3.0) * (rhs(i,j,k) - Ax(i,j,k)) / g_m_d; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlpoisson_jacobi_os (int i, int j, int k, Array4 const& phi, + Array4 const& rhs, + Array4 const& Ax, + Array4 const& osm, + T dhx, T dhy, T dhz, + Array4 const& f0, Array4 const& m0, + Array4 const& f1, Array4 const& m1, + Array4 const& f2, Array4 const& m2, + Array4 const& f3, Array4 const& m3, + Array4 const& f4, Array4 const& m4, + Array4 const& f5, Array4 const& m5, + Box const& vbox) noexcept +{ + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + + const T gamma = T(-2.)*(dhx+dhy+dhz); + + if (osm(i,j,k) == 0) { + phi(i,j,k) = T(0.0); + } else { + T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0) + ? f0(vlo.x,j,k) : T(0.0); + T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0) + ? f1(i,vlo.y,k) : T(0.0); + T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0) + ? f2(i,j,vlo.z) : T(0.0); + T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0) + ? f3(vhi.x,j,k) : T(0.0); + T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0) + ? f4(i,vhi.y,k) : T(0.0); + T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0) + ? f5(i,j,vhi.z) : T(0.0); + + T g_m_d = gamma + dhx*(cf0+cf3) + dhy*(cf1+cf4) + dhz*(cf2+cf5); + + phi(i,j,k) += T(2.0/3.0) * (rhs(i,j,k) - Ax(i,j,k)) / g_m_d; + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_PCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_PCGSolver.H new file mode 100644 index 00000000000..1b9aa55426a --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_PCGSolver.H @@ -0,0 +1,72 @@ +#ifndef AMREX_PCG_SOLVER_H_ +#define AMREX_PCG_SOLVER_H_ +#include + +#include +#include +#include +#include + +namespace amrex { + +/** + * \brief Preconditioned conjugate gradient solver + * + * \param x initial guess + * \param r initial residual + * \param mat matrix + * \param precond preconditioner + * \param maxiter max number of iterations + * \param rel_tol relative tolerance + */ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int pcg_solve (T* AMREX_RESTRICT x, T* AMREX_RESTRICT r, + M const& mat, P const& precond, int maxiter, T rel_tol) +{ + static_assert(std::is_floating_point_v); + + T rnorm0 = 0; + for (int i = 0; i < N; ++i) { + rnorm0 = std::max(rnorm0, std::abs(r[i])); + } + if (rnorm0 == 0) { return 0; } + + int iter = 0; + T rho_prev = T(1.0); // initialized to quiet gcc warning + T p[N]; + for (iter = 1; iter <= maxiter; ++iter) { + T z[N]; + precond(z, r); + T rho = 0; + for (int i = 0; i < N; ++i) { rho += r[i]*z[i]; } + if (rho == 0) { break; } + if (iter == 1) { + for (int i = 0; i < N; ++i) { p[i] = z[i]; } + } else { + auto rr = rho * (T(1.0)/rho_prev); + for (int i = 0; i < N; ++i) { + p[i] = z[i] + rr * p[i]; + } + } + T q[N]; + mat(q, p); + T pq = 0; + for (int i = 0; i < N; ++i) { pq += p[i]*q[i]; } + if (pq == 0) { break; } + T alpha = rho * (T(1.0)/pq); + T rnorm = 0; + for (int i = 0; i < N; ++i) { + x[i] += alpha * p[i]; + r[i] -= alpha * q[i]; + rnorm = std::max(rnorm, std::abs(r[i])); + } + if (rnorm <= rnorm0*rel_tol) { break; } + rho_prev = rho; + } + return iter; +} + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/Make.package b/Src/LinearSolvers/MLMG/Make.package index a8f267d4c26..3609164c919 100644 --- a/Src/LinearSolvers/MLMG/Make.package +++ b/Src/LinearSolvers/MLMG/Make.package @@ -22,7 +22,7 @@ CEXE_sources += AMReX_MLNodeLinOp.cpp CEXE_headers += AMReX_MLCellABecLap.H CEXE_headers += AMReX_MLCellABecLap_K.H AMReX_MLCellABecLap_$(DIM)D_K.H -CEXE_headers += AMReX_MLCGSolver.H +CEXE_headers += AMReX_MLCGSolver.H AMReX_PCGSolver.H CEXE_headers += AMReX_MLABecLaplacian.H CEXE_headers += AMReX_MLABecLap_K.H AMReX_MLABecLap_$(DIM)D_K.H @@ -96,4 +96,6 @@ endif VPATH_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/MLMG INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/MLMG +include $(AMREX_HOME)/Src/Boundary/Make.package + endif diff --git a/Src/Particle/AMReX_BinIterator.H b/Src/Particle/AMReX_BinIterator.H index f57b61a0371..a5422a48fac 100644 --- a/Src/Particle/AMReX_BinIterator.H +++ b/Src/Particle/AMReX_BinIterator.H @@ -22,7 +22,7 @@ constexpr bool IsParticleTileData (Args...) { template struct BinIterator { - using index_type = unsigned int; + using index_type = int; using const_pointer_type = std::conditional_t(), T, diff --git a/Src/Particle/AMReX_DenseBins.H b/Src/Particle/AMReX_DenseBins.H index 24c9cc293cb..ec5aebd6a20 100644 --- a/Src/Particle/AMReX_DenseBins.H +++ b/Src/Particle/AMReX_DenseBins.H @@ -30,7 +30,7 @@ namespace BinPolicy template struct DenseBinIteratorFactory { - using index_type = unsigned int; + using index_type = int; using const_pointer_type = std::conditional_t(), T, @@ -78,7 +78,7 @@ class DenseBins public: using BinIteratorFactory = DenseBinIteratorFactory; - using index_type = unsigned int; + using index_type = int; using const_pointer_type = std::conditional_t(), T, diff --git a/Src/Particle/AMReX_NeighborList.H b/Src/Particle/AMReX_NeighborList.H index 53434ec7c3c..6233a41002a 100644 --- a/Src/Particle/AMReX_NeighborList.H +++ b/Src/Particle/AMReX_NeighborList.H @@ -310,8 +310,8 @@ public: m_pstruct = aos().dataPtr(); auto* const pstruct_ptr = aos().dataPtr(); - const size_t np_total = aos.size(); - const size_t np_real = src_tile.numRealParticles(); + const int np_total = aos.size(); + const int np_real = src_tile.numRealParticles(); auto const* off_bins_p = off_bins_v.data(); auto const* dxi_p = dxi_v.data(); @@ -333,7 +333,7 @@ public: // First pass: count the number of neighbors for each particle //--------------------------------------------------------------------------------------------------------- - const size_t np_size = (num_bin_types > 1) ? np_total : np_real; + const int np_size = (num_bin_types > 1) ? np_total : np_real; m_nbor_counts.resize( np_size+1, 0); m_nbor_offsets.resize(np_size+1); diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H index ca1747b6b8b..8e5bb2b2563 100644 --- a/Src/Particle/AMReX_NeighborParticlesI.H +++ b/Src/Particle/AMReX_NeighborParticlesI.H @@ -1065,7 +1065,7 @@ selectActualNeighbors (CheckPair const& check_pair, int num_cells) if (isActualNeighbor) { break; } int nbr_cell_id = (ii * ny + jj) * nz + kk; for (auto p = poffset[nbr_cell_id]; p < poffset[nbr_cell_id+1]; ++p) { - if (pperm[p] == i) { continue; } + if (pperm[p] == int(i)) { continue; } if (detail::call_check_pair(check_pair, ptile_data, ptile_data, i, pperm[p])) { IntVect cell_ijk = getParticleCell(pstruct[pperm[p]], plo, dxi, domain); if (!box.contains(cell_ijk)) { diff --git a/Src/Particle/AMReX_ParticleContainerBase.cpp b/Src/Particle/AMReX_ParticleContainerBase.cpp index 7b405681e4d..4f0ec83f4cb 100644 --- a/Src/Particle/AMReX_ParticleContainerBase.cpp +++ b/Src/Particle/AMReX_ParticleContainerBase.cpp @@ -198,7 +198,7 @@ int ParticleContainerBase::MaxReaders () first = false; ParmParse pp("particles"); Max_Readers = Max_Readers_def; - pp.queryAdd("nreaders", Max_Readers); + pp.query("nreaders", Max_Readers); Max_Readers = std::min(ParallelDescriptor::NProcs(),Max_Readers); if (Max_Readers <= 0) { @@ -224,7 +224,7 @@ Long ParticleContainerBase::MaxParticlesPerRead () first = false; ParmParse pp("particles"); Max_Particles_Per_Read = Max_Particles_Per_Read_def; - pp.queryAdd("nparts_per_read", Max_Particles_Per_Read); + pp.query("nparts_per_read", Max_Particles_Per_Read); if (Max_Particles_Per_Read <= 0) { amrex::Abort("particles.nparts_per_read must be positive"); @@ -244,7 +244,7 @@ const std::string& ParticleContainerBase::AggregationType () first = false; aggregation_type = "None"; ParmParse pp("particles"); - pp.queryAdd("aggregation_type", aggregation_type); + pp.query("aggregation_type", aggregation_type); if (!(aggregation_type == "None" || aggregation_type == "Cell")) { amrex::Abort("particles.aggregation_type not implemented."); @@ -264,7 +264,7 @@ int ParticleContainerBase::AggregationBuffer () first = false; aggregation_buffer = 2; ParmParse pp("particles"); - pp.queryAdd("aggregation_buffer", aggregation_buffer); + pp.query("aggregation_buffer", aggregation_buffer); if (aggregation_buffer <= 0) { amrex::Abort("particles.aggregation_buffer must be positive"); diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index cd2d030e55c..74e65b792f0 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -56,8 +56,8 @@ ParticleContainer_impl::value, // "Particle type must be standard layout and trivial."); - pp.queryAdd("use_prepost", usePrePost); - pp.queryAdd("do_unlink", doUnlink); + pp.query("use_prepost", usePrePost); + pp.query("do_unlink", doUnlink); pp.queryAdd("do_mem_efficient_sort", memEfficientSort); initialized = true; @@ -71,9 +71,9 @@ IntVect ParticleContainer_impl::Index (const P& p, int lev) const { const Geometry& geom = Geom(lev); - const auto domain = geom.Domain(); - const auto plo = geom.ProbLoArray(); - const auto dxi = geom.InvCellSizeArray(); + const auto& domain = geom.Domain(); + const auto& plo = geom.ProbLoArray(); + const auto& dxi = geom.InvCellSizeArray(); return Assignor{}(p, plo, dxi, domain); } diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index ca40256620e..01ab0ded869 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -651,7 +651,7 @@ ParticleContainer_impl AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell (P const& p, amrex::GpuArray const& plo, - amrex::GpuArray const& dxi, - const Box& domain) noexcept + amrex::GpuArray const& dxi) noexcept { IntVect iv( AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])), int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])), int(amrex::Math::floor((p.pos(2)-plo[2])*dxi[2])))); + return iv; +} + +/** + * \brief Returns the cell index for a given particle using the + * provided lower bounds, cell sizes and global domain offset. + * + * This version indexes cells starting from 0 at the lower left corner of + * the simulation geometry, i.e., it returns a global index. + * + * \tparam P a type of AMReX particle. + * + * \param p the particle for which the cell index is calculated + * \param plo the low end of the domain + * \param dxi cell sizes in each dimension + * \param domain AMReX box in which the given particle resides + */ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +IntVect getParticleCell (P const& p, + amrex::GpuArray const& plo, + amrex::GpuArray const& dxi, + const Box& domain) noexcept +{ + IntVect iv = getParticleCell(p, plo, dxi); iv += domain.smallEnd(); return iv; } diff --git a/Src/Particle/AMReX_SparseBins.H b/Src/Particle/AMReX_SparseBins.H index fb026a8ca88..1629a997597 100644 --- a/Src/Particle/AMReX_SparseBins.H +++ b/Src/Particle/AMReX_SparseBins.H @@ -14,7 +14,7 @@ template struct SparseBinIteratorFactory { - using index_type = unsigned int; + using index_type = int; using const_pointer_type = std::conditional_t(), T, @@ -33,7 +33,7 @@ struct SparseBinIteratorFactory : m_bins_ptr(bins.dataPtr()), m_offsets_ptr(offsets.dataPtr()), m_permutation_ptr(permutation.dataPtr()), - m_items(items), m_num_bins(bins.size()) + m_items(items), m_num_bins(int(bins.size())) {} [[nodiscard]] AMREX_GPU_HOST_DEVICE @@ -97,7 +97,7 @@ public: using BinIteratorFactory = SparseBinIteratorFactory; using bin_type = IntVect; - using index_type = unsigned int; + using index_type = int; using const_pointer_type = std::conditional_t(), T, diff --git a/Src/Particle/AMReX_TracerParticle_mod_K.H b/Src/Particle/AMReX_TracerParticle_mod_K.H index ee85f61fac4..808c2b333b8 100644 --- a/Src/Particle/AMReX_TracerParticle_mod_K.H +++ b/Src/Particle/AMReX_TracerParticle_mod_K.H @@ -134,7 +134,7 @@ void linear_interpolate_to_particle (const P& p, amrex::Real sz[] = {amrex::Real(1.0) - zint, zint}; #endif - for (int comp = start_comp; comp < ncomp; ++comp) { + for (int comp = start_comp; comp < start_comp + ncomp; ++comp) { val[ctr] = ParticleReal(0.0); #if (AMREX_SPACEDIM > 2) for (int kk = 0; kk <=1; ++kk) { @@ -378,7 +378,7 @@ void linear_interpolate_to_particle_z (const P& p, hint_ilojhi, hint_ihijhi}; #endif - for (int comp = start_comp; comp < ncomp; ++comp) { + for (int comp = start_comp; comp < start_comp + ncomp; ++comp) { val[ctr] = amrex::ParticleReal(0.); #if (AMREX_SPACEDIM == 2) int k0 = 0; @@ -550,7 +550,7 @@ void linear_interpolate_to_particle_mapped (const P& p, int i = p.idata(0); int j = p.idata(1); - for (int comp = start_comp; comp < ncomp; ++comp) { + for (int comp = start_comp; comp < start_comp + ncomp; ++comp) { #if (AMREX_SPACEDIM == 2) // Value of data at surrounding nodes diff --git a/Src/Particle/AMReX_TracerParticles.cpp b/Src/Particle/AMReX_TracerParticles.cpp index 339cb58cfa8..849fb086a87 100644 --- a/Src/Particle/AMReX_TracerParticles.cpp +++ b/Src/Particle/AMReX_TracerParticles.cpp @@ -223,7 +223,7 @@ TracerParticleContainer::Timestamp (const std::string& basename, // We'll spread the output over this many files. int nOutFiles(64); ParmParse pp("particles"); - pp.queryAdd("particles_nfiles",nOutFiles); + pp.query("particles_nfiles",nOutFiles); if(nOutFiles == -1) { nOutFiles = NProcs; } diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 10fd9e2febd..1bea38256dc 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -675,7 +675,7 @@ void WriteBinaryParticleDataSync (PC const& pc, HdrFile.close(); if ( ! HdrFile.good()) { - amrex::Abort("ParticleContainer::Checkpoint(): problem writing HdrFile"); + amrex::Abort("amrex::WriteBinaryParticleDataSync(): problem writing HdrFile"); } } } @@ -969,7 +969,7 @@ void WriteBinaryParticleDataAsync (PC const& pc, HdrFile.close(); if ( ! HdrFile.good()) { - amrex::Abort("ParticleContainer::Checkpoint(): problem writing HdrFile"); + amrex::Abort("amrex::WriteBinaryParticleDataAsync(): problem writing HdrFile"); } } diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 99d72633b65..29489e8d4fe 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -121,7 +121,9 @@ else() # # List of subdirectories to search for CMakeLists. # - set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Reinit Amr CLZ Parser Parser2 CTOParFor RoundoffDomain) + set( AMREX_TESTS_SUBDIRS Amr AsyncOut CLZ CTOParFor DeviceGlobal Enum + MultiBlock MultiPeriod Parser Parser2 Reinit + RoundoffDomain) if (AMReX_PARTICLES) list(APPEND AMREX_TESTS_SUBDIRS Particles) diff --git a/Tests/DeviceGlobal/CMakeLists.txt b/Tests/DeviceGlobal/CMakeLists.txt new file mode 100644 index 00000000000..990662d406b --- /dev/null +++ b/Tests/DeviceGlobal/CMakeLists.txt @@ -0,0 +1,15 @@ +if (( (AMReX_GPU_BACKEND STREQUAL "CUDA") OR + (AMReX_GPU_BACKEND STREQUAL "HIP" ) ) AND + (NOT AMReX_GPU_RDC)) + return() +endif () + +foreach(D IN LISTS AMReX_SPACEDIM) + set(_sources main.cpp global_vars.cpp init.cpp work.cpp) + set(_input_files) + + setup_test(${D} _sources _input_files) + + unset(_sources) + unset(_input_files) +endforeach() diff --git a/Tests/DeviceGlobal/GNUmakefile b/Tests/DeviceGlobal/GNUmakefile new file mode 100644 index 00000000000..fd5fbd8f2c0 --- /dev/null +++ b/Tests/DeviceGlobal/GNUmakefile @@ -0,0 +1,23 @@ +AMREX_HOME ?= ../../ + +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +USE_CUDA = TRUE +USE_HIP = FALSE +USE_SYCL = FALSE + +USE_MPI = FALSE +USE_OMP = FALSE + +BL_NO_FORT = TRUE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/DeviceGlobal/Make.package b/Tests/DeviceGlobal/Make.package new file mode 100644 index 00000000000..8df45d1f81b --- /dev/null +++ b/Tests/DeviceGlobal/Make.package @@ -0,0 +1,2 @@ +CEXE_sources += main.cpp init.cpp work.cpp global_vars.cpp + diff --git a/Tests/DeviceGlobal/global_vars.H b/Tests/DeviceGlobal/global_vars.H new file mode 100644 index 00000000000..88ce1f0c4fc --- /dev/null +++ b/Tests/DeviceGlobal/global_vars.H @@ -0,0 +1,7 @@ +#pragma once + +#include +#include + +extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, dg_x); +extern AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, 4, dg_y); diff --git a/Tests/DeviceGlobal/global_vars.cpp b/Tests/DeviceGlobal/global_vars.cpp new file mode 100644 index 00000000000..485f41f164c --- /dev/null +++ b/Tests/DeviceGlobal/global_vars.cpp @@ -0,0 +1,7 @@ + +#include "global_vars.H" + +// definitions of global variables + +AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, dg_x); +AMREX_DEVICE_GLOBAL_VARIABLE(amrex::Long, 4, dg_y); diff --git a/Tests/DeviceGlobal/init.cpp b/Tests/DeviceGlobal/init.cpp new file mode 100644 index 00000000000..2850941f1ee --- /dev/null +++ b/Tests/DeviceGlobal/init.cpp @@ -0,0 +1,27 @@ + +#include "global_vars.H" + +void init () +{ + amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int) + { + dg_x = 1; + for (int n = 0; n < 4; ++n) { + dg_y[n] = 100 + n; + } + }); + + amrex::Gpu::streamSynchronize(); +} + +void init2 () +{ + amrex::Gpu::PinnedVector pv{2,200,201,202,203}; + amrex::Gpu::memcpy_from_host_to_device_global_async + (dg_x, pv.data(), sizeof(amrex::Long)); + amrex::Gpu::memcpy_from_host_to_device_global_async + (dg_y, pv.data()+1, sizeof(amrex::Long)); + amrex::Gpu::memcpy_from_host_to_device_global_async + (dg_y, pv.data()+2, sizeof(amrex::Long)*3, sizeof(amrex::Long)); + amrex::Gpu::streamSynchronize(); +} diff --git a/Tests/DeviceGlobal/main.cpp b/Tests/DeviceGlobal/main.cpp new file mode 100644 index 00000000000..b3b67784722 --- /dev/null +++ b/Tests/DeviceGlobal/main.cpp @@ -0,0 +1,20 @@ +#include +#include + +void init(); +void work(); +void init2(); +void work2(); + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + { + init(); + work(); + + init2(); + work2(); + } + amrex::Finalize(); +} diff --git a/Tests/DeviceGlobal/work.cpp b/Tests/DeviceGlobal/work.cpp new file mode 100644 index 00000000000..8350dad0661 --- /dev/null +++ b/Tests/DeviceGlobal/work.cpp @@ -0,0 +1,40 @@ + +#include "global_vars.H" + +void work () +{ + amrex::Gpu::PinnedVector pv; + pv.resize(5,0); + auto* p = pv.data(); + amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int) + { + p[0] = dg_x; + for (int n = 0; n < 4; ++n) { + p[1+n] = dg_y[n]; + } + }); + amrex::Gpu::streamSynchronize(); + AMREX_ALWAYS_ASSERT(pv[0] == 1 && + pv[1] == 100 && + pv[2] == 101 && + pv[3] == 102 && + pv[4] == 103); +} + +void work2 () +{ + amrex::Gpu::PinnedVector pv; + pv.resize(5,0); + amrex::Gpu::memcpy_from_device_global_to_host_async + (pv.data(), dg_x, sizeof(amrex::Long)); + amrex::Gpu::memcpy_from_device_global_to_host_async + (pv.data()+1, dg_y, sizeof(amrex::Long)); + amrex::Gpu::memcpy_from_device_global_to_host_async + (pv.data()+2, dg_y, sizeof(amrex::Long)*3, sizeof(amrex::Long)); + amrex::Gpu::streamSynchronize(); + AMREX_ALWAYS_ASSERT(pv[0] == 2 && + pv[1] == 200 && + pv[2] == 201 && + pv[3] == 202 && + pv[4] == 203); +} diff --git a/Tests/DivFreePatch/main.cpp b/Tests/DivFreePatch/main.cpp index 7358946c434..857461a1f64 100644 --- a/Tests/DivFreePatch/main.cpp +++ b/Tests/DivFreePatch/main.cpp @@ -94,11 +94,14 @@ void CoarsenToFine(MultiFab& div_refined_coarse, Real MFdiff(const MultiFab& lhs, const MultiFab& rhs, - int strt_comp, int num_comp, int nghost, const std::string name = "") + int strt_comp, int num_comp, int nghost, const std::string name = "", + bool relative = false) { MultiFab temp(lhs.boxArray(), lhs.DistributionMap(), lhs.nComp(), nghost); Copy(temp, lhs, strt_comp, strt_comp, num_comp, nghost); temp.minus(rhs, strt_comp, num_comp, nghost); + if (relative) { + temp.divide(rhs, strt_comp, num_comp, nghost); } if (name != "") { amrex::VisMF::Write(temp, std::string("pltfiles/" + name)); } @@ -301,7 +304,7 @@ void main_main () // Setup initial value on the coarse faces. for (int i=0; i max_i) ? max_diff : max_i; + amrex::Print() << " Fine valid region maximum change, comp "< +#include +#include + +using namespace amrex; + +AMREX_ENUM(MyColor, red, green, blue ); + +namespace my_namespace { + AMREX_ENUM(MyColor, orange, yellow,cyan ); +} + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc, argv); + { + auto const& names = amrex::getEnumNameStrings(); + auto const& names2 = amrex::getEnumNameStrings(); + amrex::Print() << "colors:"; + for (auto const& name : names) { + amrex::Print() << " " << name; + } + amrex::Print() << "\n"; + amrex::Print() << "colors:"; + for (auto const& name : names2) { + amrex::Print() << " " << name; + } + amrex::Print() << "\n"; + + ParmParse pp; + { + auto color = static_cast(999); + pp.query("color1", color); + amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n'; + AMREX_ALWAYS_ASSERT(color == MyColor::red); + } + { + auto color = static_cast(999); + pp.get("color2", color); + amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n'; + AMREX_ALWAYS_ASSERT(color == MyColor::green); + } + { + auto color = static_cast(999); + pp.get("color3", color); + amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n'; + AMREX_ALWAYS_ASSERT(color == MyColor::blue); + } + { + auto color = static_cast(999); + try { + pp.query("color4", color); + } catch (std::runtime_error const& e) { + amrex::Print() << "As expected, " << e.what() << '\n'; + } + AMREX_ALWAYS_ASSERT(color == static_cast(999)); + try { + pp.get_enum_case_insensitive("color4", color); + } catch (std::runtime_error const& e) { + amrex::Print() << "As expected, " << e.what() << '\n'; + } + AMREX_ALWAYS_ASSERT(color == static_cast(999)); + } + { + auto color = static_cast(999); + try { + pp.query("color5", color); + } catch (std::runtime_error const& e) { + amrex::Print() << "As expected, " << e.what() << '\n'; + } + AMREX_ALWAYS_ASSERT(color == static_cast(999)); + pp.query_enum_case_insensitive("color5", color); + amrex::Print() << "color = " << amrex::getEnumNameString(color) << '\n'; + AMREX_ALWAYS_ASSERT(color == MyColor::blue); + } + { + std::vector color; + pp.getarr("colors", color); + AMREX_ALWAYS_ASSERT(color.size() == 3 && + color[0] == my_namespace::MyColor::cyan && + color[1] == my_namespace::MyColor::yellow && + color[2] == my_namespace::MyColor::orange); + std::vector color2; + pp.queryarr("colors", color2); + AMREX_ALWAYS_ASSERT(color.size() == 3 && + color == color2 && + color[0] == my_namespace::MyColor::cyan && + color[1] == my_namespace::MyColor::yellow && + color[2] == my_namespace::MyColor::orange); + amrex::Print() << "colors:"; + for (auto const& c : color) { + amrex::Print() << " " << amrex::getEnumNameString(c); + } + amrex::Print() << "\n"; + } + } + + amrex::Finalize(); +} diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H index 5834e9dc624..eb93bf2836c 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H @@ -61,6 +61,7 @@ private: bool semicoarsening = false; int max_coarsening_level = 30; int max_semicoarsening_level = 0; + bool use_gauss_seidel = true; // true: red-black, false: jacobi bool use_hypre = false; bool use_petsc = false; diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp index 27cd0b7a4b4..7a7647ce93a 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp @@ -62,9 +62,10 @@ MyTest::solvePoisson () if (composite_solve) { - MLPoisson mlpoisson(geom, grids, dmap, info); + mlpoisson.setGaussSeidel(use_gauss_seidel); + mlpoisson.setMaxOrder(linop_maxorder); // This is a 3d problem with Dirichlet BC @@ -105,6 +106,8 @@ MyTest::solvePoisson () { MLPoisson mlpoisson({geom[ilev]}, {grids[ilev]}, {dmap[ilev]}, info); + mlpoisson.setGaussSeidel(use_gauss_seidel); + mlpoisson.setMaxOrder(linop_maxorder); // This is a 3d problem with Dirichlet BC @@ -163,6 +166,8 @@ MyTest::solveABecLaplacian () MLABecLaplacian mlabec(geom, grids, dmap, info); + mlabec.setGaussSeidel(use_gauss_seidel); + mlabec.setMaxOrder(linop_maxorder); mlabec.setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, @@ -220,6 +225,8 @@ MyTest::solveABecLaplacian () { MLABecLaplacian mlabec({geom[ilev]}, {grids[ilev]}, {dmap[ilev]}, info); + mlabec.setGaussSeidel(use_gauss_seidel); + mlabec.setMaxOrder(linop_maxorder); mlabec.setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, @@ -290,6 +297,8 @@ MyTest::solveABecLaplacianInhomNeumann () MLABecLaplacian mlabec(geom, grids, dmap, info); + mlabec.setGaussSeidel(use_gauss_seidel); + mlabec.setMaxOrder(linop_maxorder); // This is a 3d problem with inhomogeneous Neumann BC @@ -349,6 +358,8 @@ MyTest::solveABecLaplacianInhomNeumann () { MLABecLaplacian mlabec({geom[ilev]}, {grids[ilev]}, {dmap[ilev]}, info); + mlabec.setGaussSeidel(use_gauss_seidel); + mlabec.setMaxOrder(linop_maxorder); // This is a 3d problem with inhomogeneous Neumann BC @@ -547,6 +558,8 @@ MyTest::readParameters () pp.query("max_coarsening_level", max_coarsening_level); pp.query("max_semicoarsening_level", max_semicoarsening_level); + pp.query("use_gauss_seidel", use_gauss_seidel); + pp.query("use_gmres", use_gmres); AMREX_ALWAYS_ASSERT(use_gmres == false || prob_type == 2); diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.H b/Tests/LinearSolvers/CurlCurl/MyTest.H index e665b59850c..036db35e05c 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.H +++ b/Tests/LinearSolvers/CurlCurl/MyTest.H @@ -30,6 +30,7 @@ private: bool consolidation = true; int max_coarsening_level = 30; + bool use_pcg = false; bool use_gmres = false; bool gmres_use_precond = true; int gmres_precond_niters = 1; diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.cpp b/Tests/LinearSolvers/CurlCurl/MyTest.cpp index aba1536b1ae..1093e8c8deb 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.cpp +++ b/Tests/LinearSolvers/CurlCurl/MyTest.cpp @@ -46,6 +46,8 @@ MyTest::solve () } mlcc.prepareRHS({&rhs}); + if (use_pcg) { mlcc.setUsePCG(true); } + using V = Array; MLMGT mlmg(mlcc); mlmg.setMaxIter(max_iter); @@ -105,6 +107,7 @@ MyTest::readParameters () pp.query("consolidation", consolidation); pp.query("max_coarsening_level", max_coarsening_level); + pp.query("use_pcg", use_pcg); pp.query("use_gmres", use_gmres); pp.query("gmres_use_precond", gmres_use_precond); pp.query("gmres_precond_niters", gmres_precond_niters); diff --git a/Tests/MultiPeriod/CMakeLists.txt b/Tests/MultiPeriod/CMakeLists.txt new file mode 100644 index 00000000000..d5760b11dce --- /dev/null +++ b/Tests/MultiPeriod/CMakeLists.txt @@ -0,0 +1,9 @@ +foreach(D IN LISTS AMReX_SPACEDIM) + set(_sources main.cpp) + set(_input_files ) + + setup_test(${D} _sources _input_files) + + unset(_sources) + unset(_input_files) +endforeach() diff --git a/Tests/MultiPeriod/GNUmakefile b/Tests/MultiPeriod/GNUmakefile new file mode 100644 index 00000000000..d0d895ff522 --- /dev/null +++ b/Tests/MultiPeriod/GNUmakefile @@ -0,0 +1,24 @@ +AMREX_HOME := ../.. + +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +USE_MPI = FALSE +USE_OMP = FALSE +USE_CUDA = FALSE +USE_HIP = FALSE +USE_SYCL = FALSE + +BL_NO_FORT = TRUE + +TINY_PROFILE = FALSE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/MultiPeriod/Make.package b/Tests/MultiPeriod/Make.package new file mode 100644 index 00000000000..6b4b865e8fc --- /dev/null +++ b/Tests/MultiPeriod/Make.package @@ -0,0 +1 @@ +CEXE_sources += main.cpp diff --git a/Tests/MultiPeriod/main.cpp b/Tests/MultiPeriod/main.cpp new file mode 100644 index 00000000000..89075c96b79 --- /dev/null +++ b/Tests/MultiPeriod/main.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include + +using namespace amrex; + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc, argv); + { + // Domain size: 2 x 128 x 4 + Box box(IntVect(0), IntVect(AMREX_D_DECL(1, 127, 3))); + Array is_periodic{AMREX_D_DECL(1,1,1)}; + Geometry geom(box, RealBox(AMREX_D_DECL(Real(0),Real(0),Real(0)), + AMREX_D_DECL(Real(1),Real(1),Real(1))), + CoordSys::cartesian, is_periodic); + BoxArray ba(box); + ba.maxSize(32); + ba.convert(IntVect(AMREX_D_DECL(1,0,0))); // nodal in x-direction + DistributionMapping dm(ba); + + FabArray> mf1(ba,dm,1,IntVect(4)); + FabArray> mf2(ba,dm,1,IntVect(5)); + + mf1.setVal(-1); + mf2.setVal(-2); + + auto const& len = geom.Domain().length3d(); + auto expected = [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + while (i < 0 ) { i += len[0]; } + while (i >= len[0]) { i -= len[0]; } + while (j < 0 ) { j += len[1]; } + while (j >= len[1]) { j -= len[1]; } + while (k < 0 ) { k += len[2]; } + while (k >= len[2]) { k -= len[2]; } + return Long(i) + Long(j)*Long(len[0]) + Long(k)*Long(len[0])*Long(len[1]); + }; + + auto const& ma1 = mf1.arrays(); + auto const& ma2 = mf2.arrays(); + + // Initialize valid region + ParallelFor(mf1, IntVect(0), [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + ma1[b](i,j,k) = expected(i,j,k); + }); + + mf1.FillBoundary(geom.periodicity()); + mf2.ParallelCopy(mf1, 0, 0, 1, IntVect(0), mf2.nGrowVect(), geom.periodicity()); + + auto r1 = ParReduce(TypeList{}, TypeList{}, mf1, mf1.nGrowVect(), + [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) -> GpuTuple + { + return { Long(expected(i,j,k) != ma1[b](i,j,k)) }; + }); + auto r2 = ParReduce(TypeList{}, TypeList{}, mf2, mf2.nGrowVect(), + [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) -> GpuTuple + { + return { Long(expected(i,j,k) != ma2[b](i,j,k)) }; + }); + + AMREX_ALWAYS_ASSERT(r1 == 0); + AMREX_ALWAYS_ASSERT(r2 == 0); + + if (r1 == 0 && r2 == 0) { + amrex::Print() << "SUCCESS\n"; + } + } + amrex::Finalize(); +} diff --git a/Tests/Parser/main.cpp b/Tests/Parser/main.cpp index 167c0a54a31..df82ba2f3f0 100644 --- a/Tests/Parser/main.cpp +++ b/Tests/Parser/main.cpp @@ -373,7 +373,7 @@ int main (int argc, char* argv[]) int count = 0; int x = 11; { - auto f = [&] (std::string const& s) -> int + auto f = [&] (std::string const& s) { amrex::Print() << count++ << ". Testing \"" << s << "\"\n"; IParser iparser(s); @@ -390,7 +390,7 @@ int main (int argc, char* argv[]) AMREX_ALWAYS_ASSERT(f("x/13/5") == ((x/13)/5)); AMREX_ALWAYS_ASSERT(f("13/x/5") == ((13/x)/5)); - auto g = [&] (std::string const& s, std::string const& c, int cv) -> int + auto g = [&] (std::string const& s, std::string const& c, int cv) { amrex::Print() << count++ << ". Testing \"" << s << "\"\n"; IParser iparser(s); @@ -408,7 +408,7 @@ int main (int argc, char* argv[]) AMREX_ALWAYS_ASSERT(g("x/b/5", "b", 13) == ((x/13)/5)); AMREX_ALWAYS_ASSERT(g("b/x/5", "b", 13) == ((13/x)/5)); - auto h = [&] (std::string const& s) -> int + auto h = [&] (std::string const& s) { amrex::Print() << count++ << ". Testing \"" << s << "\"\n"; IParser iparser(s); @@ -435,6 +435,31 @@ int main (int argc, char* argv[]) } } } + + AMREX_ALWAYS_ASSERT(h("123456789012345") == 123456789012345LL); + AMREX_ALWAYS_ASSERT(h("123456789012345.") == 123456789012345LL); + AMREX_ALWAYS_ASSERT(h("123'456'789'012'345") == 123456789012345LL); + AMREX_ALWAYS_ASSERT(h("1.23456789012345e14") == 123456789012345LL); + AMREX_ALWAYS_ASSERT(h("1.0E3") == 1000); + AMREX_ALWAYS_ASSERT(h("2**40") == 1024LL*1024LL*1024LL*1024LL); + + auto test_bad_number = [&] (std::string const& s) + { + amrex::Print() << count++ << ". Testing \"" << s << "\"\n"; + try { + IParser iparser(s); + auto exe = iparser.compileHost<0>(); + auto r = exe(); + amrex::ignore_unused(r); + return false; + } catch (std::runtime_error const& e) { + amrex::Print() << " Expected error: " << e.what() << '\n'; + return true; + } + }; + AMREX_ALWAYS_ASSERT(test_bad_number("1000000e-4")); + AMREX_ALWAYS_ASSERT(test_bad_number("1.234e2")); + AMREX_ALWAYS_ASSERT(test_bad_number("3.14")); } amrex::Print() << "\nAll IParser tests passed\n\n"; } diff --git a/Tests/Particles/NeighborParticles/Constants.H b/Tests/Particles/NeighborParticles/Constants.H index 18c94dbe255..78cfa3ca7d8 100644 --- a/Tests/Particles/NeighborParticles/Constants.H +++ b/Tests/Particles/NeighborParticles/Constants.H @@ -8,8 +8,8 @@ namespace Params // This is designed to represent MFiX-like conditions where the grid spacing is // roughly 2.5 times the particle diameter. In main.cpp we set grid spacing to 1 // so here we set cutoff to diameter = 1/2.5 --> cutoff = 0.2 - static constexpr amrex::Real cutoff = 0.2 ; - static constexpr amrex::Real min_r = 1.e-4; + static constexpr amrex::ParticleReal cutoff = 0.2 ; + static constexpr amrex::ParticleReal min_r = 1.e-4; } #endif diff --git a/Tests/Particles/NeighborParticles/MDParticleContainer.cpp b/Tests/Particles/NeighborParticles/MDParticleContainer.cpp index 21543c7aed8..0a2570f798c 100644 --- a/Tests/Particles/NeighborParticles/MDParticleContainer.cpp +++ b/Tests/Particles/NeighborParticles/MDParticleContainer.cpp @@ -1,8 +1,9 @@ +#include +#include + #include "MDParticleContainer.H" #include "Constants.H" - #include "CheckPair.H" -#include using namespace amrex; @@ -148,8 +149,9 @@ std::pair MDParticleContainer::minAndMaxDistance() const int lev = 0; auto& plev = GetParticles(lev); - Real min_d = std::numeric_limits::max(); - Real max_d = std::numeric_limits::min(); + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + using ReduceTuple = typename decltype(reduce_data)::Type; for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { @@ -159,41 +161,40 @@ std::pair MDParticleContainer::minAndMaxDistance() auto& ptile = plev[index]; auto& aos = ptile.GetArrayOfStructs(); - const size_t np = aos.numParticles(); auto nbor_data = m_neighbor_list[lev][index].data(); ParticleType* pstruct = aos().dataPtr(); - Gpu::DeviceScalar min_d_gpu(min_d); - Gpu::DeviceScalar max_d_gpu(max_d); - - Real* pmin_d = min_d_gpu.dataPtr(); - Real* pmax_d = max_d_gpu.dataPtr(); - - AMREX_FOR_1D ( np, i, - { - ParticleType& p1 = pstruct[i]; + ParticleReal min_start = std::numeric_limits::max(); + ParticleReal max_start = std::numeric_limits::lowest(); - for (const auto& p2 : nbor_data.getNeighbors(i)) - { - AMREX_D_TERM(Real dx = p1.pos(0) - p2.pos(0);, - Real dy = p1.pos(1) - p2.pos(1);, - Real dz = p1.pos(2) - p2.pos(2);) + reduce_op.eval(aos.numParticles(), reduce_data, + [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple + { + ParticleType& p1 = pstruct[i]; - Real r2 = AMREX_D_TERM(dx*dx, + dy*dy, + dz*dz); - r2 = amrex::max(r2, Params::min_r*Params::min_r); - Real r = sqrt(r2); + ParticleReal min_d = min_start; + ParticleReal max_d = max_start; - Gpu::Atomic::Min(pmin_d, r); - Gpu::Atomic::Max(pmax_d, r); - } - }); + for (const auto& p2 : nbor_data.getNeighbors(i)) + { + AMREX_D_TERM(ParticleReal dx = p1.pos(0) - p2.pos(0);, + ParticleReal dy = p1.pos(1) - p2.pos(1);, + ParticleReal dz = p1.pos(2) - p2.pos(2);) - Gpu::Device::streamSynchronize(); + ParticleReal r2 = AMREX_D_TERM(dx*dx, + dy*dy, + dz*dz); + r2 = amrex::max(r2, Params::min_r*Params::min_r); + auto r = ParticleReal(std::sqrt(r2)); - min_d = std::min(min_d, min_d_gpu.dataValue()); - max_d = std::max(max_d, max_d_gpu.dataValue()); + min_d = std::min(min_d, r); + max_d = std::max(max_d, r); + } + return {min_d, max_d}; + }); } + + ParticleReal min_d = amrex::get<0>(reduce_data.value(reduce_op)); + ParticleReal max_d = amrex::get<1>(reduce_data.value(reduce_op)); ParallelDescriptor::ReduceRealMin(min_d, ParallelDescriptor::IOProcessorNumber()); ParallelDescriptor::ReduceRealMax(max_d, ParallelDescriptor::IOProcessorNumber()); diff --git a/Tools/CMake/AMReXConfig.cmake.in b/Tools/CMake/AMReXConfig.cmake.in index 7731e988e68..f5045b715cb 100644 --- a/Tools/CMake/AMReXConfig.cmake.in +++ b/Tools/CMake/AMReXConfig.cmake.in @@ -79,6 +79,7 @@ set(AMReX_PARTICLES_FOUND @AMReX_PARTICLES@) set(AMReX_P@AMReX_PARTICLES_PRECISION@_FOUND ON) set(AMReX_SENSEI_FOUND @AMReX_SENSEI@) set(AMReX_CONDUIT_FOUND @AMReX_CONDUIT@) +set(AMReX_CATALYST_FOUND @AMReX_CATALYST@) set(AMReX_ASCENT_FOUND @AMReX_ASCENT@) set(AMReX_HYPRE_FOUND @AMReX_HYPRE@) set(AMReX_PETSC_FOUND @AMReX_PETSC@) @@ -134,6 +135,7 @@ set(AMReX_PARTICLES_PRECISION @AMReX_PARTICLES_PRECISION@) set(AMReX_SENSEI @AMReX_SENSEI@) set(AMReX_NO_SENSEI_AMR_INST @AMReX_NO_SENSEI_AMR_INST@) set(AMReX_CONDUIT @AMReX_CONDUIT@) +set(AMReX_CATALYST @AMReX_CATALYST@) set(AMReX_ASCENT @AMReX_ASCENT@) set(AMReX_HYPRE @AMReX_HYPRE@) set(AMReX_PETSC @AMReX_PETSC@) @@ -202,6 +204,10 @@ if (@AMReX_ASCENT@) find_dependency(Ascent REQUIRED) endif () +if (@AMReX_CATALYST@) + find_dependency(Catalyst REQUIRED) +endif () + if (@AMReX_CONDUIT@) find_dependency(Conduit REQUIRED) endif () diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake index c99c3a63791..3e5d4c8bdb4 100644 --- a/Tools/CMake/AMReXOptions.cmake +++ b/Tools/CMake/AMReXOptions.cmake @@ -284,8 +284,7 @@ print_option(AMReX_FORTRAN_INTERFACES) option( AMReX_LINEAR_SOLVERS "Build AMReX Linear solvers" ON ) print_option( AMReX_LINEAR_SOLVERS ) -cmake_dependent_option( AMReX_AMRDATA "Build data services" OFF - "AMReX_FORTRAN" OFF ) +option( AMReX_AMRDATA "Build data services" OFF ) print_option( AMReX_AMRDATA ) option( AMReX_PARTICLES "Build particle classes" ON) @@ -321,6 +320,11 @@ print_option( AMReX_NO_SENSEI_AMR_INST ) option( AMReX_CONDUIT "Enable Conduit support" OFF ) print_option( AMReX_CONDUIT ) +# Catalyst +cmake_dependent_option( AMReX_CATALYST "Enable Catalyst support" OFF + "AMReX_CONDUIT" OFF ) +print_option( AMReX_CATALYST ) + # Ascent cmake_dependent_option( AMReX_ASCENT "Enable Ascent support" OFF "AMReX_CONDUIT" OFF ) diff --git a/Tools/CMake/AMReXSetDefines.cmake b/Tools/CMake/AMReXSetDefines.cmake index 783b1ab83d6..d271321bde4 100644 --- a/Tools/CMake/AMReXSetDefines.cmake +++ b/Tools/CMake/AMReXSetDefines.cmake @@ -148,6 +148,9 @@ add_amrex_define( AMREX_NO_SENSEI_AMR_INST NO_LEGACY IF AMReX_NO_SENSEI_AMR_INST # Conduit Support add_amrex_define( AMREX_USE_CONDUIT NO_LEGACY IF AMReX_CONDUIT ) +# Catalyst Support +add_amrex_define( AMREX_USE_CATALYST NO_LEGACY IF AMReX_CATALYST ) + # Ascent Support add_amrex_define( AMREX_USE_ASCENT NO_LEGACY IF AMReX_ASCENT ) diff --git a/Tools/CMake/AMReXThirdPartyLibraries.cmake b/Tools/CMake/AMReXThirdPartyLibraries.cmake index f8f49e9c478..abe62a2ebc9 100644 --- a/Tools/CMake/AMReXThirdPartyLibraries.cmake +++ b/Tools/CMake/AMReXThirdPartyLibraries.cmake @@ -90,6 +90,15 @@ if (AMReX_ASCENT) # Ascent will find conduit, so check for Ascent first endforeach() endif () +# +# Catalyst +# +if (AMReX_CATALYST) + find_package(Catalyst REQUIRED PATHS "$ENV{CATALYST_IMPLEMENTATION_PATHS}") + foreach(D IN LISTS AMReX_SPACEDIM) + target_link_libraries(amrex_${D}d PUBLIC catalyst::catalyst) + endforeach() +endif () # # Conduit diff --git a/Tools/CMake/AMReX_Config_ND.H.in b/Tools/CMake/AMReX_Config_ND.H.in index 07df0fb53cb..3296a403ff0 100644 --- a/Tools/CMake/AMReX_Config_ND.H.in +++ b/Tools/CMake/AMReX_Config_ND.H.in @@ -42,6 +42,7 @@ #cmakedefine AMREX_USE_SENSEI_INSITU #cmakedefine AMREX_NO_SENSEI_AMR_INST #cmakedefine AMREX_USE_CONDUIT +#cmakedefine AMREX_USE_CATALYST #cmakedefine AMREX_USE_ASCENT #cmakedefine AMREX_USE_EB #cmakedefine AMREX_USE_CUDA diff --git a/Tools/C_util/Convergence/Make.package b/Tools/C_util/Convergence/Make.package index 06127427ca0..95fa155816a 100644 --- a/Tools/C_util/Convergence/Make.package +++ b/Tools/C_util/Convergence/Make.package @@ -4,7 +4,7 @@ CEXE_sources += AMReX_DataServices.cpp AMReX_AmrData.cpp CEXE_sources += ComputeAmrDataNorms.cpp CEXE_headers += ComputeAmrDataNorms.H -FEXE_sources += AMReX_FABUTIL_$(DIM)D.F AMReX_AVGDOWN_$(DIM)D.F +FEXE_sources += AMReX_AVGDOWN_$(DIM)D.F #ifeq ($(USE_ARRAYVIEW),TRUE) # CEXE_sources += DatasetClient.cpp diff --git a/Tools/GNUMake/Make.defs b/Tools/GNUMake/Make.defs index a1a2aa105b0..6a1c51c9292 100644 --- a/Tools/GNUMake/Make.defs +++ b/Tools/GNUMake/Make.defs @@ -216,6 +216,12 @@ else USE_CONDUIT := FALSE endif +ifdef USE_CATALYST + USE_CATALYST := $(strip $(USE_CATALYST)) +else + USE_CATALYST := FALSE +endif + ifdef USE_ASCENT USE_ASCENT := $(strip $(USE_ASCENT)) else @@ -1057,6 +1063,11 @@ ifeq ($(USE_CONDUIT),TRUE) include $(AMREX_HOME)/Tools/GNUMake/packages/Make.conduit endif +ifeq ($(USE_CATALYST),TRUE) + $(info Loading $(AMREX_HOME)/Tools/GNUMake/packages/Make.catalyst...) + include $(AMREX_HOME)/Tools/GNUMake/packages/Make.catalyst +endif + ifeq ($(USE_ASCENT),TRUE) $(info Loading $(AMREX_HOME)/Tools/GNUMake/packages/Make.ascent...) include $(AMREX_HOME)/Tools/GNUMake/packages/Make.ascent diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak index 3bcf5cb4372..4e9a7e4652e 100644 --- a/Tools/GNUMake/comps/dpcpp.mak +++ b/Tools/GNUMake/comps/dpcpp.mak @@ -45,6 +45,14 @@ ifeq ($(WARN_ALL),TRUE) warning_flags += -Wpedantic + # /tmp/icpx-2d34de0e47/global_vars-header-4390fb.h:25:36: error: zero size arrays are an extension [-Werror,-Wzero-length-array] + # 25 | const char* const kernel_names[] = { + # | ^ + # 1 error generated. + # + # Seen in oneapi 2024.2.0 after adding Test/DeviceGlobal + warning_flags += -Wno-zero-length-array + ifneq ($(WARN_SHADOW),FALSE) warning_flags += -Wshadow endif diff --git a/Tools/GNUMake/packages/Make.catalyst b/Tools/GNUMake/packages/Make.catalyst new file mode 100644 index 00000000000..810daa906f2 --- /dev/null +++ b/Tools/GNUMake/packages/Make.catalyst @@ -0,0 +1,16 @@ +######################################################### +# Catalyst (https://gitlab.kitware.com/paraview/catalyst) Support +######################################################### + +CPPFLAGS += -DAMREX_USE_CATALYST + +ifdef CATALYST_DIR + INCLUDE_LOCATIONS += $(CATALYST_DIR)/include + VPATH_LOCATIONS += $(CATALYST_DIR)/include + LIBRARY_LOCATIONS += $(CATALYST_DIR)/lib + LIBRARIES += -Wl,-rpath,$(CATALYST_DIR)/lib + + LIBRARIES += -lcatalyst + +endif + diff --git a/Tools/Plotfile/fboxinfo.cpp b/Tools/Plotfile/fboxinfo.cpp index 6d84ab519c7..85886d6c0c5 100644 --- a/Tools/Plotfile/fboxinfo.cpp +++ b/Tools/Plotfile/fboxinfo.cpp @@ -6,18 +6,18 @@ using namespace amrex; namespace { -class BoxND +class BoxNDDynamic { public: - friend std::ostream& operator<< (std::ostream& os, const BoxND& b); - BoxND (Box const& b, int dim) : m_box(b), m_dim(dim) {} + friend std::ostream& operator<< (std::ostream& os, const BoxNDDynamic& b); + BoxNDDynamic (Box const& b, int dim) : m_box(b), m_dim(dim) {} private: Box m_box; int m_dim; }; std::ostream& -operator<< (std::ostream& os, const BoxND& b) +operator<< (std::ostream& os, const BoxNDDynamic& b) { if (b.m_dim == 1) { os << "(" @@ -175,10 +175,10 @@ void main_main() const BoxArray& ba = plotfile.boxArray(ilev); const Long nboxes = ba.size(); const Box prob_domain = plotfile.probDomain(ilev); - amrex::Print() << " " << BoxND(prob_domain,dim) + amrex::Print() << " " << BoxNDDynamic(prob_domain,dim) << " " << nboxes << "\n"; for (int ibox = 0; ibox < nboxes; ++ibox) { - amrex::Print() << " " << BoxND(ba[ibox],dim) << "\n"; + amrex::Print() << " " << BoxNDDynamic(ba[ibox],dim) << "\n"; } } }