From 389e7feceef50de6edbebf48fb7174c4b42731be Mon Sep 17 00:00:00 2001 From: "Scott V. Luedtke" Date: Thu, 29 Feb 2024 10:17:10 -0700 Subject: [PATCH 1/3] Updated build and run details with more variables including CMake options --- src/util/boot.cc | 85 +++++++++++++++++++++++++++++++++++++++++++++--- src/vpic/vpic.cc | 9 +++-- 2 files changed, 87 insertions(+), 7 deletions(-) diff --git a/src/util/boot.cc b/src/util/boot.cc index 213a3491..b232609f 100644 --- a/src/util/boot.cc +++ b/src/util/boot.cc @@ -3,6 +3,7 @@ #include #include +#include "../vpic/kokkos_tuning.hpp" double _boot_timestamp = 0; @@ -81,10 +82,86 @@ boot_services( int * pargc, std::cout << "# VPIC Git Hash: " << GIT_REVISION << std::endl; std::cout << "# Built on: " << BUILD_TIMESTAMP << std::endl; - std::cout << "# MPI Ranks: " << _world_size << std::endl; - std::cout << "# Threads: " << thread.n_pipeline << std::endl; - - std::cout << "######### End Run Details ########" << std::endl; + std::cout << "## CMake options ##" << std::endl; +# if defined ENABLE_INTEGRATED_TESTS + std::cout << "# ENABLE_INTEGRATED_TESTS: ON" << std::endl; +# else + std::cout << "# ENABLE_INTEGRATED_TESTS: OFF" << std::endl; +#endif +# if defined ENABLE_UNIT_TESTS + std::cout << "# ENABLE_UNIT_TESTS: ON" << std::endl; +# else + std::cout << "# ENABLE_UNIT_TESTS: OFF" << std::endl; +#endif +# if defined USE_V4_ALTIVEC + std::cout << "# USE_V4_ALTIVEC: ON" << std::endl; +# else + std::cout << "# USE_V4_ALTIVEC: OFF" << std::endl; +#endif +# if defined USE_V4_PORTABLE + std::cout << "# USE_V4_PORTABLE: ON" << std::endl; +# else + std::cout << "# USE_V4_PORTABLE: OFF" << std::endl; +#endif +# if defined USE_V4_SSE + std::cout << "# USE_V4_SSE: ON" << std::endl; +# else + std::cout << "# USE_V4_SSE: OFF" << std::endl; +#endif +# if defined ENABLE_OPENSSL + std::cout << "# ENABLE_OPENSSL: ON" << std::endl; +# else + std::cout << "# ENABLE_OPENSSL: OFF" << std::endl; +#endif +# if defined ENABLE_KOKKOS_OPENMP + std::cout << "# ENABLE_KOKKOS_OPENMP: ON" << std::endl; +# else + std::cout << "# ENABLE_KOKKOS_OPENMP: OFF" << std::endl; +#endif +# if defined ENABLE_KOKKOS_CUDA + std::cout << "# ENABLE_KOKKOS_CUDA: ON" << std::endl; +# else + std::cout << "# ENABLE_KOKKOS_CUDA: OFF" << std::endl; +#endif +# if defined BUILD_INTERNAL_KOKKOS + std::cout << "# BUILD_INTERNAL_KOKKOS: ON" << std::endl; +# else + std::cout << "# BUILD_INTERNAL_KOKKOS: OFF" << std::endl; +#endif +# if defined VPIC_DUMP_ENERGIES + std::cout << "# VPIC_DUMP_ENERGIES: ON" << std::endl; +# else + std::cout << "# VPIC_DUMP_ENERGIES: OFF" << std::endl; +#endif +# if defined VPIC_ENABLE_AUTO_TUNING + std::cout << "# VPIC_ENABLE_AUTO_TUNING: ON" << std::endl; +# else + std::cout << "# VPIC_ENABLE_AUTO_TUNING: OFF" << std::endl; +#endif + std::cout << "# NOTE: Auto tuning may have overwritten what CMake set these as." << std::endl; +# if defined VPIC_ENABLE_HIERARCHICAL + std::cout << "# VPIC_ENABLE_HIERARCHICAL: ON" << std::endl; +# else + std::cout << "# VPIC_ENABLE_HIERARCHICAL: OFF" << std::endl; +#endif +# if defined VPIC_ENABLE_TEAM_REDUCTION + std::cout << "# VPIC_ENABLE_TEAM_REDUCTION: ON" << std::endl; +# else + std::cout << "# VPIC_ENABLE_TEAM_REDUCTION: OFF" << std::endl; +#endif +# if defined VPIC_ENABLE_VECTORIZATION + std::cout << "# VPIC_ENABLE_VECTORIZATION: ON" << std::endl; +# else + std::cout << "# VPIC_ENABLE_VECTORIZATION: OFF" << std::endl; +#endif +# if defined VPIC_ENABLE_ACCUMULATOR + std::cout << "# VPIC_ENABLE_ACCUMULATOR: ON" << std::endl; +# else + std::cout << "# VPIC_ENABLE_ACCUMULATOR: OFF" << std::endl; +#endif + std::cout << "# Using sort method: " << EXPAND_AND_STRINGIFY(SORT) << std::endl; + + std::cout << "######### End Build Details ########" << std::endl; std::cout << std::endl; // blank line } diff --git a/src/vpic/vpic.cc b/src/vpic/vpic.cc index b7fe070c..94c995b1 100644 --- a/src/vpic/vpic.cc +++ b/src/vpic/vpic.cc @@ -122,19 +122,22 @@ void vpic_simulation::print_run_details() // physics focused params: // num steps, nx, ny, nz, num particles per species std::cout << "######### Run Details ##########" << std::endl; + std::cout << "# MPI Ranks: " << _world_size << std::endl; + std::cout << "# Threads: " << thread.n_pipeline << std::endl; std::cout << "## Global:" << std::endl; std::cout << " # Num Step " << num_step << std::endl; std::cout << " # px " << px << " py " << py << " pz " << pz << std::endl; std::cout << " # gnx " << px*grid->nx << " gny " << py*grid->ny << " gnz " << pz*grid->nz << std::endl; + std::cout << " # dx " << grid->dx << " dy " << grid->dy << " dz " << grid->dz << std::endl; + std::cout << " # dt " << grid->dt << " cvac " << grid->cvac << " eps0 " << grid->eps0 << std::endl; std::cout << "## Local:" << std::endl; std::cout << " # nx " << grid->nx << " ny " << grid->ny << " nz " << grid->nz << std::endl; - std::cout << " # dx " << grid->dx << " dy " << grid->dy << " dz " << grid->dz << std::endl; if (species_list ) { - std::cout << "## Particle Species: " << num_species( species_list ) << std::endl; + std::cout << "## Local Particle Species: " << num_species( species_list ) << std::endl; LIST_FOR_EACH( sp, species_list ) { - std::cout << " # " << sp->name << " np " << sp->np << std::endl; + std::cout << " # " << sp->name << " np " << sp->np << " max_np " << sp->max_np << std::endl; } } std::cout << "######### End Run Details ######" << std::endl; From c4b31f0b75eb9da0fbb8adf1655226428f32b8a0 Mon Sep 17 00:00:00 2001 From: "Scott V. Luedtke" Date: Tue, 16 Jul 2024 14:40:42 -0600 Subject: [PATCH 2/3] Minor updates to the documentation --- docs/source/comp.rst | 86 +++++++++++++++++++------------------------ docs/source/index.rst | 1 - docs/source/intro.rst | 2 +- docs/source/run.rst | 11 ------ docs/source/vis.rst | 2 +- 5 files changed, 39 insertions(+), 63 deletions(-) delete mode 100644 docs/source/run.rst diff --git a/docs/source/comp.rst b/docs/source/comp.rst index 68fafd1e..f8c306ae 100644 --- a/docs/source/comp.rst +++ b/docs/source/comp.rst @@ -6,26 +6,19 @@ Compiling VPIC consists of two steps: compiling or finding a Kokkos install, and Quickstart ********** +Compile Kokkos and VPIC +----------------------- -1) Do a *recursive* clone of this repo, this will pull down a copy of Kokkos -for you. -``` -git clone --recursive git@github.com:lanl/vpic-kokkos.git -``` -If you switch branches, you might need to update the Kokkos submodule. -``` -git submodule update --init -``` -2) Load modules for CMake, your compiler, MPI, and any platform specific packages like CUDA. -3) Find a file in `arch/` that is close to your intended system and modify as necessary. -Pay particular attention to lines like:: +1. Do a *recursive* clone of this repo, this will pull down a copy of Kokkos for you. ``git clone --recursive git@github.com:lanl/vpic-kokkos.git`` If you switch branches, you might need to update the Kokkos submodule. ``git submodule update --init`` +2. Load modules for CMake, your compiler, MPI, and any platform specific packages like CUDA. +3. Find a file in `arch/` that is close to your intended system and modify as necessary. Pay particular attention to lines like:: -DENABLE_KOKKOS_CUDA=ON -DKokkos_ARCH_VOLTA70=ON -DKokkos_ARCH_POWER9=ON -4) Make a build directory and run the arch file. -5) Type `make`. +4. Make a build directory and run the arch file. +5. Type ``make``. This should give you a simple working of the code, but be aware it does come with caveats. Most notably, one is expected to build and maintain a version of @@ -33,9 +26,17 @@ Kokkos (and optionally VPIC) per target device (one per GPU device, one per CPU platform, etc), where as the above builds for the specific platform that you're currently on. -A reminder to NVIDIA GPU users, CUDA 10.2 does not support GCC 8 or newer. We -recommend you use GCC 6 or 7. +Build a deck +------------ +Compiling VPIC creates a script `bin/vpic` that compiles decks. From the folder you want the executable to be written to, type ``$BUILD_PATH/bin/vpic MyDeck.cxx``. + +Run the executable +------------------ + +The executable can be run with a simple MPI command: ``mpirun -np $NUM_PROCS MyDeck.Linux``. Consider saving the stdout and stderr to a text file by appending ``2>&1 | tee out.txt``. + +Examine `sample/short_pulse.slurm` for an example submission script. We recommend using one MPI rank per CPU core or one rank per GPU as a baseline, but that may not work best for your simulation on your hardware. Manual Kokkos Install (more powerful, more effort) ************************************************** @@ -44,27 +45,18 @@ It is typical to maintain many different installs of Kokkos (CPU, older GPU, new GPU, Debug, etc), so it's worth while learning how to install Kokkos manually. To do this we will use cmake. On can achieve this by: -CPU: -``` -cmake -DKokkos_ENABLE_OPENMP=ON -DKokkos_ARCH_KNL=ON .. -``` +CPU:``cmake -DKokkos_ENABLE_OPENMP=ON -DKokkos_ARCH_KNL=ON ...`` -GPU: -``` -cmake -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_VOLTA70=ON .. -``` +GPU:``cmake -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_VOLTA70=ON ...`` Legacy non-cmake build for old Kokkos versions ********************************************** -1) Clone Kokkos (or use ./kokkos in the recursive clone) from -https://github.com/kokkos/kokkos -2) Make a build folder, and execute `../generate_makefile.bash`, passing the -appropriate options for platform and device architecture. These look something -like: +1. Clone Kokkos (or use ./kokkos in the recursive clone) from https://github.com/kokkos/kokkos +2. Make a build folder, and execute `../generate_makefile.bash`, passing the appropriate options for platform and device architecture. These look something like: - - CPU: `../generate_makefile.bash --with-serial --with-openmp --prefix=$KOKKOS_INSTALL_DIR` - - GPU: `../generate_makefile.bash --with-serial --with-openmp --with-cuda --arch=Kepler30 --with-cuda-options=enable_lambda --compiler=$KOKKOS_SRC_DIR/bin/nvcc_wrapper --prefix=$KOKKOS_INSTALL_DIR` + - CPU: ``../generate_makefile.bash --with-serial --with-openmp --prefix=$KOKKOS_INSTALL_DIR`` + - GPU: ``../generate_makefile.bash --with-serial --with-openmp --with-cuda --arch=Kepler30 --with-cuda-options=enable_lambda --compiler=$KOKKOS_SRC_DIR/bin/nvcc_wrapper --prefix=$KOKKOS_INSTALL_DIR`` Further Reading *************** @@ -82,32 +74,28 @@ options that are available. These include: 4. `VPIC_KOKKOS_DEBUG` 5. `KOKKOS_ARCH` -Building VPIC + Kokkos -********************** - -Then when we build VPIC we need to make sure we using the GPU, you need to -specify the Kokkos `nvcc_wrapper` to be the compiler. This typically looks -something like: - -`CXX=$HOME/tools/kokkos_gpu/bin/nvcc_wrapper cmake -DENABLE_KOKKOS_CUDA=ON -DCMAKE_BUILD_TYPE=Release ..` - -Note: As with any CMake application, it's critical that you set the -CMAKE_BUILD_TYPE (and other optimization flags) appropriately for your target -system - Optimization Options ******************** VPIC has compilation flags for enabling/disabling various optimizations. VPIC will automatically selct optimizations settings. Users can supply their own settings for potentially better performance. The optimization options are as follows: 1. `VPIC_ENABLE_AUTO_TUNING=ON` - - Control whether to use the automatically determined optimization settings or user supplied compile time flags. + + Control whether to use the automatically determined optimization settings or user supplied compile time flags. + 2. `VPIC_ENABLE_HIERARCHICAL=OFF` - - Allow finer control over how work is distributed amoung threads. Automatically enabled by certain optimizations (Team reduction, Vectorization) that require explicit control over threads and vector lanes. Performance is highly dependent on how work is distributed. See kokkos_tuning.hpp for setting the number of leagues (thread teams) and team size (threads per team). + + Allow finer control over how work is distributed amoung threads. Automatically enabled by certain optimizations (Team reduction, Vectorization) that require explicit control over threads and vector lanes. Performance is highly dependent on how work is distributed. See kokkos_tuning.hpp for setting the number of leagues (thread teams) and team size (threads per team). + 3. `VPIC_ENABLE_TEAM_REDUCTION=OFF` - - Reduce number of atomic writes in the particle push. Checks if all the particles being processed by active threads / vector lanes belong to the same cell. If so, use fast register based methods to reduce current so that only 1 thread/lane needs to update the fields. + + Reduce number of atomic writes in the particle push. Checks if all the particles being processed by active threads / vector lanes belong to the same cell. If so, use fast register based methods to reduce current so that only 1 thread/lane needs to update the fields. + 4. `VPIC_ENABLE_VECTORIZATION=OFF` - - Enables vectorization with OpenMP SIMD for greater performance on the CPU + + Enables vectorization with OpenMP SIMD for greater performance on the CPU + 5. `VPIC_ENABLE_ACCUMULATORS=OFF` - - Use an explicit accumulator for collecting current in advance_p. The accumulator results in better memory access patterns when writing current. This is useful on CPUs but not necessary on GPUs which have better random access characteristics. + + Use an explicit accumulator for collecting current in advance_p. The accumulator results in better memory access patterns when writing current. This is useful on CPUs but not necessary on GPUs which have better random access characteristics. diff --git a/docs/source/index.rst b/docs/source/index.rst index 070ece35..cbb419ab 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -13,7 +13,6 @@ This documentation is very much a work in progress. Most documentation is still intro comp - run porting vis pbd diff --git a/docs/source/intro.rst b/docs/source/intro.rst index b65ff4a7..1af69b59 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -5,4 +5,4 @@ Welcome to the Kokkos version of the Vector Particle-in-Cell code, VPIC 2.0! VP Getting Started *************** -Getting started with VPIC will be much easier when the documentation is complete! For now, some build instructions are in :doc:`comp`, and some elementary run instructions are in :doc:`run`. +Getting started with VPIC will be much easier when the documentation is complete! For now, some quickstart instructions are in :doc:`comp`. diff --git a/docs/source/run.rst b/docs/source/run.rst deleted file mode 100644 index 6dcd26c0..00000000 --- a/docs/source/run.rst +++ /dev/null @@ -1,11 +0,0 @@ -Running VPIC -============ - -Running on multiple GPUs -************************ - -To run on multiple GPU's, you can pass the flag: `--kokkos-num-devices=N` (which replaced `--kokkos-ndevices`), where -`N` specifies the number of GPUs (per node). This works by VPIC passing through -options it doesn't understand to Kokkos, and thus VPIC will generate a warning -as it thinks you may have tried to tell it something it doesn't understand... - diff --git a/docs/source/vis.rst b/docs/source/vis.rst index b170acd5..f53632ac 100644 --- a/docs/source/vis.rst +++ b/docs/source/vis.rst @@ -1,4 +1,4 @@ -OAPostprocessing and Visualization +Postprocessing and Visualization ================================ At present, there is no standardized way to visualize or postprocess VPIC data. From 96b250d45a9d76d44fa3384a73e19f14364b9c12 Mon Sep 17 00:00:00 2001 From: "Scott V. Luedtke" Date: Tue, 16 Jul 2024 15:18:27 -0600 Subject: [PATCH 3/3] Added documentation on updating the documentation --- docs/source/dev.rst | 11 +++++++++++ docs/source/index.rst | 1 + 2 files changed, 12 insertions(+) create mode 100644 docs/source/dev.rst diff --git a/docs/source/dev.rst b/docs/source/dev.rst new file mode 100644 index 00000000..43a6536b --- /dev/null +++ b/docs/source/dev.rst @@ -0,0 +1,11 @@ +Developers +========== + +Updating GitHub Pages Documentation +*********************************** + +The online documentation comes from the branch gh-pages and is not automatically updated when devel or any other branch is updated. There are ways to automate this, but they seemed fragile, so for now some manual intervention is required every time the documentation is updated. + +First, update the documentation in the devel branch and compile the html. Next, in a separate repo, checkout the gh-pages branch. Copy all the compiled files (likely in `vpic-kokkos/docs/build/html/` directly into the root of the gh-pages repo. Don't get cute and copy "only what's updated" because you're liable to mess up links and such. + +Open it up with a browser and make sure everything looks fine. Add all the files ``git add -A``, but watch out for any lingering submodules (Kokkos). Make a commit and finish it off with a ``git push origin gh-pages``. It should take immediate effect. If you don't see the updates or only some of them, your browser might be cacheing some of the pages. diff --git a/docs/source/index.rst b/docs/source/index.rst index cbb419ab..6192042c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,6 +18,7 @@ This documentation is very much a work in progress. Most documentation is still pbd mcnplink issues + dev Indices and tables