diff --git a/.github/workflows/build-with-kokkos.yml b/.github/workflows/build-with-kokkos.yml index f9c90e17d..ad56034b1 100644 --- a/.github/workflows/build-with-kokkos.yml +++ b/.github/workflows/build-with-kokkos.yml @@ -71,12 +71,14 @@ jobs: exit -1 esac - - name: Install CMake, OpenMPI and dtrace + - name: Install git, CMake, OpenMPI, PAPI and dtrace run: | apt --yes --no-install-recommends install \ + git ca-certificates \ cmake make \ libopenmpi-dev \ - systemtap-sdt-dev + systemtap-sdt-dev \ + libpapi-dev - name: Compile and install Kokkos working-directory: kokkos run: | diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml index c49a6fdea..4e162b49f 100644 --- a/.github/workflows/clang-format-check.yml +++ b/.github/workflows/clang-format-check.yml @@ -1,13 +1,11 @@ -name: Clang-Format Check +name: Clang-Format check on: [push, pull_request] jobs: formatting-check: - name: Formatting Check runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Run clang-format style check for C/C++/Protobuf programs. - uses: jidicula/clang-format-action@v4.5.0 + - name: Run clang-format style check. + uses: DoozyX/clang-format-lint-action@v0.16.2 with: - clang-format-version: '8' - check-path: './' + clangFormatVersion: 8 diff --git a/CMakeLists.txt b/CMakeLists.txt index 428402c1a..45299188b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,8 @@ option(KokkosTools_ENABLE_MPI "Enable MPI support" OFF) option(KokkosTools_ENABLE_CALIPER "Enable building Caliper library" OFF) option(KokkosTools_ENABLE_APEX "Enable building Apex library" OFF) option(KokkosTools_ENABLE_EXAMPLES "Build examples" OFF) +option(KokkosTools_ENABLE_TESTS "Build tests" OFF) + # Advanced settings option(KokkosTools_REUSE_KOKKOS_COMPILER "Set the compiler and flags based on installed Kokkos settings" OFF) mark_as_advanced(KokkosTools_REUSE_KOKKOS_COMPILER) @@ -95,7 +97,8 @@ endif() include(cmake/configure_variorum.cmake) set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER}) -set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available +set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available +set(KOKKOSTOOLS_HAS_ROCTX ${Kokkos_ENABLE_HIP}) # we assume that enabling HIP for Kokkos program means roctx should be available if(DEFINED ENV{VTUNE_HOME}) set(VTune_ROOT $ENV{VTUNE_HOME}) @@ -115,6 +118,9 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/profiling/all) set(COMMON_HEADERS_PATH ${CMAKE_CURRENT_BINARY_DIR}/common) include_directories(${COMMON_HEADERS_PATH}) +# Allow all tools to include any file. +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common) + set(SINGLELIB_PROFILERS "" CACHE STRING "" FORCE) # Export settings @@ -263,6 +269,13 @@ if(KokkosTools_ENABLE_EXAMPLES) endif() endif() +# Tests +if(KokkosTools_ENABLE_TESTS) + enable_testing() + include(cmake/BuildGTest.cmake) + add_subdirectory(tests) +endif() + # Install exports install(TARGETS ${EXPORT_TARGETS} EXPORT ${EXPORT_NAME}) install(EXPORT ${EXPORT_NAME} diff --git a/CMakePresets.json b/CMakePresets.json index 34fd2665a..8d76097d8 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -9,7 +9,9 @@ "CMAKE_CXX_STANDARD" : "17", "KokkosTools_ENABLE_EXAMPLES" : "ON", "KokkosTools_ENABLE_SINGLE" : "ON", - "KokkosTools_ENABLE_MPI" : "ON" + "KokkosTools_ENABLE_MPI" : "ON", + "KokkosTools_ENABLE_PAPI" : "ON", + "KokkosTools_ENABLE_TESTS" : "ON" } }, { diff --git a/README.md b/README.md index 73f1a902a..44643fc9c 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,38 @@ Note: `Kokkos` must be configured with `Kokkos_ENABLE_LIBDL=ON` to load profilin ## General Usage -To use one of the tools you have to compile it, which will generate a dynamic library. Before executing the Kokkos application you then have to set the environment variable `KOKKOS_TOOLS_LIBS` to point to the dynamic library e.g. in the `bash` shell: -``` -export KOKKOS_TOOLS_LIBS=${HOME}/kokkos-tools/src/tools/memory-events/kp_memory_event.so -``` +To use one of the tools you have to compile it, which will generate a dynamic library. Before executing the Kokkos application you then have to set the environment variable `KOKKOS_TOOLS_LIBS` to point to the dynamic library. + +CMake and Makefiles are supported for building Kokkos Tools. The following provides instructions for both. + +## Using cmake + +### Build + +1. Create a build directory in Kokkos Tools, e.g., type `mkdir myBuild; cd myBuild` +2. To configure, type `cmake .. -DCMAKE_INSTALL_PREFIX=${YOUR_KOKKOS_TOOLS_INSTALL_DIR}`. There are more options but in most cases the defaults are sufficient. +3. To compile, type `make` +4. To install, type `make install` + +### Run + +Given your installed tool shared library `lib.so` and an application executable called yourApplication.exe, type: + +`export KOKKOS_TOOLS_LIBS=${YOUR_KOKKOS_TOOLS_INSTALL_DIR}/lib.so; ./yourApplication.exe` + + +## Using make + +### Build + +To build some library `` with make, simply type `make` within that library's subdirectory `${YOUR_KOKKOS_TOOLS_LIB_SRC_DIR}` of Kokkos Tools. This generates the shared library within that subdirectory. + +### Run + +Given your installed tool shared library `.so` and an application executable called `yourApplication.exe`, type: + +`export KOKKOS_TOOLS_LIBS=${YOUR_KOKKOS_TOOLS_LIB_SRC_DIR}/.so; ./yourApplication.exe` -Many of the tools will produce an output file that uses the hostname as well as the process id as part of the filename. ## Explicit Instrumentation @@ -33,11 +59,14 @@ void foo() { The following provides an overview of the tools available in the set of Kokkos Tools. Click on each Kokkos Tools name to see more details about the tool via the Kokkos Tools Wiki. ### Utilities - + [**KernelFilter:**](https://github.com/kokkos/kokkos-tools/wiki/KernelFilter) A tool which is used in conjunction with analysis tools, to restrict them to a subset of the application. ++ [**KernelSampler:**](https://github.com/kokkos/kokkos-tools/wiki/KernelSampler) + + A tool to be used in conjunction with analysis tools to restrict the tooling to samples of Kokkos kernel invocations. + ### Memory Analysis + [**MemoryHighWater:**](https://github.com/kokkos/kokkos-tools/wiki/MemoryHighWater) @@ -85,30 +114,6 @@ The following provides an overview of the tools available in the set of Kokkos T accumulate statistics, and utilize various portable function calls for common needs w.r.t. timers, resource usage, etc. -# Building Kokkos Tools - -Use either CMake or Makefile to build Kokkos Tools. - -## Using cmake - -1. create a build directory in Kokkos Tools, e.g., type `mkdir myBuild; cd myBuild` -2. To configure the Type `ccmake ..` for any options you would like to enable/disable. -3. To compile, type `make` -4. To install, type `make install` - -## Using make - -To build with make, simply type `make` within each subdirectory of Kokkos Tools. - - -Building using `make` is currently recommended. Eventually, the preferred method of building will be `cmake`. - -# Running a Kokkos-based Application with a tool - -Given your tool shared library `.so` (which contains kokkos profiling callback functions) and an application executable called yourApplication.exe, type: - -`export KOKKOS_TOOLS_LIBS=${YOUR_KOKKOS_TOOLS_DIR}/; ./yourApplication.exe` - # Tutorial A tutorial on Kokkos Tools can be found here: https://github.com/kokkos/kokkos-tutorials/blob/main/LectureSeries/KokkosTutorial_07_Tools.pdf diff --git a/build-all.sh b/build-all.sh index 5cd77240a..993c2b011 100644 --- a/build-all.sh +++ b/build-all.sh @@ -9,10 +9,9 @@ make -f $ROOT_DIR/profiling/memory-events/Makefile make -f $ROOT_DIR/profiling/memory-hwm/Makefile make -f $ROOT_DIR/profiling/memory-hwm-mpi/Makefile make -f $ROOT_DIR/profiling/memory-usage/Makefile -make -f $ROOT_DIR/profiling/nvprof-connector/Makefile -make -f $ROOT_DIR/profiling/nvprof-focused-connector/Makefile +make -f $ROOT_DIR/profiling/nvtx-connector/Makefile +make -f $ROOT_DIR/profiling/nvtx-focused-connector/Makefile make -f $ROOT_DIR/profiling/papi-connector/Makefile -make -f $ROOT_DIR/profiling/simple-kernel-timer-json/Makefile make -f $ROOT_DIR/profiling/simple-kernel-timer/Makefile make -f $ROOT_DIR/profiling/space-time-stack/Makefile make -f $ROOT_DIR/profiling/systemtap-connector/Makefile diff --git a/cmake/BuildGTest.cmake b/cmake/BuildGTest.cmake new file mode 100644 index 000000000..0d16ff721 --- /dev/null +++ b/cmake/BuildGTest.cmake @@ -0,0 +1,35 @@ +# Look for Google Test and enable it as a target. +# +# The main targets that will be available are: +# * GTest::gtest +# * GTest::gmock +# +# References: +# * https://github.com/google/googletest +# * https://matgomes.com/integrate-google-test-into-cmake/ +# * https://google.github.io/googletest/quickstart-cmake.html +# * https://jeremimucha.com/2021/04/cmake-fetchcontent/ + +include(FetchContent) + +# Declare the Google Test dependency +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.14.0 +) + +# If not yet populated, add Google Test to the build with the following options: +# * disable installation of Google Test +# * enable GMock +# Note that we could have used FetchContent_MakeAvailable instead, but it would then +# use the default configuration that would install Google Test. +FetchContent_GetProperties(googletest) +if (NOT googletest_POPULATED) + FetchContent_Populate(googletest) + + set(BUILD_GMOCK ON) + set(INSTALL_GTEST OFF) + + add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL) +endif() diff --git a/cmake/configure_variorum.cmake b/cmake/configure_variorum.cmake index b6280dc0a..4c45612e8 100644 --- a/cmake/configure_variorum.cmake +++ b/cmake/configure_variorum.cmake @@ -8,8 +8,9 @@ if(NOT DEFINED Variorum_ROOT) if(DEFINED ENV{VARIORUM_ROOT}) set(Variorum_ROOT $ENV{VARIORUM_ROOT}) set(MSG_NOTFOUND "check VARIORUM_ROOT environment variable ($ENV{VARIORUM_ROOT})") + else() + set(Variorum_ROOT ${VARIORUM_ROOT}) endif() - set(Variorum_ROOT ${VARIORUM_ROOT}) else() set(MSG_NOTFOUND "check Variorum_ROOT (${Variorum_ROOT})") endif() diff --git a/common/kernel-filter/CMakeLists.txt b/common/kernel-filter/CMakeLists.txt index efe6d7c6b..1bef9189f 100644 --- a/common/kernel-filter/CMakeLists.txt +++ b/common/kernel-filter/CMakeLists.txt @@ -1 +1 @@ -kp_add_library(kp_kernel_filter ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_filter.cpp) +kp_add_library(kp_kernel_filter kp_kernel_filter.cpp) diff --git a/common/kokkos-sampler/CMakeLists.txt b/common/kokkos-sampler/CMakeLists.txt index 609ab5707..dd269a369 100644 --- a/common/kokkos-sampler/CMakeLists.txt +++ b/common/kokkos-sampler/CMakeLists.txt @@ -1 +1 @@ -add_library(kp_kokkos_sampler ${KOKKOSTOOLS_LIBRARY_MODE} kp_sampler_skip.cpp) +kp_add_library(kp_kokkos_sampler kp_sampler_skip.cpp) diff --git a/common/kokkos-sampler/Makefile b/common/kokkos-sampler/Makefile index 862cae8fa..ce5f56ca1 100644 --- a/common/kokkos-sampler/Makefile +++ b/common/kokkos-sampler/Makefile @@ -1,4 +1,4 @@ -CXX = clang++ +CXX = g++ CXXFLAGS = -O3 -std=c++17 -g diff --git a/common/kokkos-sampler/README.md b/common/kokkos-sampler/README.md index 3d0c6393a..7ab95fc6a 100644 --- a/common/kokkos-sampler/README.md +++ b/common/kokkos-sampler/README.md @@ -1,4 +1,11 @@ -This is a sampler utility that is intended to complement other tools in the Kokkos Tools set. This utility allows for sampling (rather than collecting) of profiling or debugging data gathered from a particular tool of the Kokkos Tools set. The Kokkos Tools user provides a sampling rate via the environment variable KOKKOS_TOOLS_SAMPLER_SKIP. +This is a sampler utility that is intended to complement other tools in the Kokkos Tools set. This utility allows for sampling (rather than collecting) of profiling or debugging data gathered from a particular tool of the Kokkos Tools set. + +To use this utility, a Kokkos Tools user provides a sampling probability by setting the environment variable `KOKKOS_TOOLS_SAMPLER_PROB` to a positive real number between 0.0 and 100.0. The user can alternatively set a sampling skip rate, i.e., the number of Kokkos kernel invocations to skip before the next sample is taken. The user does so by setting the environment variable `KOKKOS_TOOLS_SAMPLER_SKIP` to a non-negative integer. + +If both sampling probability and sampling skip rate are set by the user, this sampling utility only uses the sampling probability for sampling; the utility sets the sampling skip rate to 1, incorporating no pre-defined periodicity in sampling. If neither sampling probability nor the sampling skip rate are set by the user, then randomized sampling is done, with the sampler's probability being 10.0 percent. The sampler is periodic only if the sampling probability is not set by the user and the sampling skip rate is set by the user. + +For randomized sampling, the user can ensure reproducibility of this tool's output across multiple runs of a Kokkos application by setting `KOKKOS_TOOLS_RANDOM_SEED` to an integer value before all of the runs. If this environment variable is not set, the seed is based on the C time function. In order for the state of the sampled profiling and logging data in memory to be captured at the time of the utility's callback invocation, it might be important to enforce fences. However, this also means that there are more synchronization points compared with running the program without the tool. -This fencing behavior can be controlled by setting the environment variable `KOKKOS_TOOLS_GLOBALFENCES`. A non-zero value implies global fences on invocation of the tool. The default is not to introduce extra fences. +This fencing behavior can be controlled by setting the environment variable `KOKKOS_TOOLS_GLOBALFENCES`. A non-zero value implies global fences on invocation of the tool. The default is not to introduce extra fences. + diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 9c53bcc5c..7c10ffcea 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -6,13 +6,18 @@ #include #include "../../profiling/all/kp_core.hpp" #include "kp_config.hpp" +#include +#include +#include namespace KokkosTools { namespace Sampler { static uint64_t uniqID = 0; -static uint64_t kernelSampleSkip = 101; +static uint64_t kernelSampleSkip = std::numeric_limits::max(); +static double tool_prob_num = -1.0; static int tool_verbosity = 0; static int tool_globFence = 0; +static int tool_seed = -1; // a hash table mapping kID to nestedkID static std::unordered_map infokIDSample; @@ -33,17 +38,51 @@ static endFunction endReduceCallee = NULL; void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { - if (0 == tool_globFence) { - settings->requires_global_fencing = false; + settings->requires_global_fencing = false; +} + +// set of functions from Kokkos ToolProgrammingInterface (includes fence) +Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs; + +uint32_t getDeviceID(uint32_t devid_in) { + int num_device_bits = 7; + int num_instance_bits = 17; + return (~((uint32_t(-1)) << num_device_bits)) & + (devid_in >> num_instance_bits); +} + +void invoke_ktools_fence(uint32_t devID) { + if (tpi_funcs.fence != nullptr) { + tpi_funcs.fence(devID); + if (tool_verbosity > 1) { + std::cout << "KokkosP: Sampler utility sucessfully invoked tool-induced " + "fence on device " + << getDeviceID(devID) << ".\n"; + } } else { - settings->requires_global_fencing = true; + std::cout << "KokkosP: FATAL: Kokkos Tools Programming Interface's " + "tool-invoked Fence is NULL!\n"; + std::abort(); + exit(-1); } } +void kokkosp_provide_tool_programming_interface( + uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface funcsFromTPI) { + if (!num_funcs) { + if (tool_verbosity > 0) + std::cout << "KokkosP: Note: Number of functions in Tools Programming " + "Interface is 0!\n"; + } + tpi_funcs = funcsFromTPI; +} + void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); + const char* tool_seed_str = getenv("KOKKOS_TOOLS_RANDOM_SEED"); + if (NULL != tool_verbose_str) { tool_verbosity = atoi(tool_verbose_str); } else { @@ -55,14 +94,19 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_globFence = 0; } + if (NULL != tool_seed_str) { + tool_seed = atoi(tool_seed_str); + } + char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); if (NULL == profileLibrary) { - printf( - "Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a depreciated " - "variable. Please use KOKKOS_TOOLS_LIBS\n"); + std::cout << "Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a " + "deprecated variable. Please use KOKKOS_TOOLS_LIBS\n"; + profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); if (NULL == profileLibrary) { - printf("KokkosP: No library to call in %s\n", profileLibrary); + std::cout << "KokkosP: FATAL: No library to call in " << profileLibrary + << "!\n"; exit(-1); } } @@ -79,12 +123,13 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, nextLibrary = strtok(NULL, ";"); if (NULL == nextLibrary) { - printf("KokkosP: No child library to call in %s\n", profileLibrary); + std::cout << "KokkosP: FATAL: No child library to call in " + << profileLibrary << "!\n"; exit(-1); } else { if (tool_verbosity > 0) { - printf("KokkosP: Next library to call: %s\n", nextLibrary); - printf("KokkosP: Loading child library ..\n"); + std::cout << "KokkosP: Next library to call: " << nextLibrary << "\n"; + std::cout << "KokkosP: Loading child library of sampler..\n"; } void* childLibrary = dlopen(nextLibrary, RTLD_NOW | RTLD_GLOBAL); @@ -119,19 +164,19 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } if (tool_verbosity > 0) { - printf("KokkosP: Function Status:\n"); - printf("KokkosP: begin-parallel-for: %s\n", - (beginForCallee == NULL) ? "no" : "yes"); - printf("KokkosP: begin-parallel-scan: %s\n", - (beginScanCallee == NULL) ? "no" : "yes"); - printf("KokkosP: begin-parallel-reduce: %s\n", - (beginReduceCallee == NULL) ? "no" : "yes"); - printf("KokkosP: end-parallel-for: %s\n", - (endForCallee == NULL) ? "no" : "yes"); - printf("KokkosP: end-parallel-scan: %s\n", - (endScanCallee == NULL) ? "no" : "yes"); - printf("KokkosP: end-parallel-reduce: %s\n", - (endReduceCallee == NULL) ? "no" : "yes"); + std::cout << "KokkosP: Function Status:\n"; + std::cout << "KokkosP: begin-parallel-for: " + << ((beginForCallee == NULL) ? "no" : "yes") << "\n"; + std::cout << "KokkosP: begin-parallel-scan: " + << ((beginScanCallee == NULL) ? "no" : "yes") << "\n"; + std::cout << "KokkosP: begin-parallel-reduce: " + << ((beginReduceCallee == NULL) ? "no" : "yes") << "\n"; + std::cout << "KokkosP: end-parallel-for: " + << ((endForCallee == NULL) ? "no" : "yes") << "\n"; + std::cout << "KokkosP: end-parallel-scan: " + << ((endScanCallee == NULL) ? "no" : "yes") << "\n"; + std::cout << "KokkosP: end-parallel-reduce: " + << ((endReduceCallee == NULL) ? "no" : "yes") << "\n"; } } } @@ -140,15 +185,78 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, uniqID = 1; + if (0 > tool_seed) { + srand(time(NULL)); + if (tool_verbosity > 0) { + std::cout << "KokkosP: Seeding random number generator using clock for " + "random sampling.\n"; + } + } else { + srand(tool_seed); + if (tool_verbosity > 0) { + std::cout << "KokkosP: Seeding random number generator using seed " + << tool_seed << " for random sampling.\n"; + } + } + + const char* tool_probability = getenv("KOKKOS_TOOLS_SAMPLER_PROB"); + + if (NULL != tool_probability) { + // Read sampling probability as a float between 0 and 100, representing + // a percentage that data should be gathered. + // Utility reasons about probability as a double between 0.0 and 1.0. + tool_prob_num = atof(tool_probability); + if (tool_prob_num > 100.0) { + std::cout << "KokkosP: The sampling probability value is set to be " + "greater than 100.0. The probability for the sampler will " + "be set to 100 percent; all of the invocations of a Kokkos " + "kernel will be profiled.\n"; + tool_prob_num = 100.0; + } else if (tool_prob_num < 0.0) { + std::cout + << "KokkosP: The sampling probability value is set to be a negative " + "number. The sampler's probability will be set to 0 percent; none " + "of the invocations of a Kokkos kernel will be profiled.\n"; + tool_prob_num = 0.0; + } + if (tool_verbosity > 0) { + std::cout << "KokkosP: Probability for the sampler set to: " + << tool_prob_num << "\n"; + } + kernelSampleSkip = 1; + } + const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); - if (NULL != tool_sample) { + if ((NULL != tool_sample) && (tool_prob_num == -1.0)) { + // If the user touched the sample skip rate variable + // and the tool probability is set to -1 (no probability sampling + // desired), then use only sampler skip rate. + tool_prob_num = 100.0; kernelSampleSkip = atoi(tool_sample) + 1; + if (tool_verbosity > 0) { + std::cout << "KokkosP: Sampling rate set to: " << tool_sample << "\n"; + } } - if (tool_verbosity > 0) { - printf("KokkosP: Sampling rate set to: %s\n", tool_sample); + if (tool_prob_num == -1.0) { + // If the tool probability is set to -1 (no probability sampling + // desired) and the user also didn't set + // skip rate, then use a default with a probability sampling of 10%. + + if (tool_verbosity > 0) { + std::cout << "KokkosP: Neither the probability nor the skip rate for " + "sampling were set...\n"; + } + tool_prob_num = 10.0; + kernelSampleSkip = 1; + if (tool_verbosity > 0) { + std::cout << "KokkosP: The probability for the sampler is set to the " + "default of " + << tool_prob_num + << " percent. The skip rate for sampler will not be used.\n"; + } } -} +} // end kokkosp_init_library void kokkosp_finalize_library() { if (NULL != finalizeProfileLibrary) (*finalizeProfileLibrary)(); @@ -160,14 +268,23 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, static uint64_t invocationNum = 0; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); - } - if (NULL != beginForCallee) { - uint64_t nestedkID = 0; - (*beginForCallee)(name, devID, &nestedkID); - infokIDSample.insert({*kID, nestedkID}); + if ((rand() / (1.0 * RAND_MAX)) < (tool_prob_num / 100.0)) { + if (NULL != beginForCallee) { + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << *kID + << " calling child-begin function...\n"; + } + if (tool_globFence) { + invoke_ktools_fence(0); + } + uint64_t nestedkID = 0; + (*beginForCallee)(name, devID, &nestedkID); + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << *kID + << " finished with child-begin function.\n"; + } + infokIDSample.insert({*kID, nestedkID}); + } } } } @@ -177,10 +294,19 @@ void kokkosp_end_parallel_for(const uint64_t kID) { if (!(infokIDSample.find(kID) == infokIDSample.end())) { uint64_t retrievedNestedkID = infokIDSample[kID]; if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); + std::cout << "KokkosP: sample " << kID + << " calling child-end function...\n"; + } + + if (tool_globFence) { + invoke_ktools_fence(0); } (*endForCallee)(retrievedNestedkID); + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << kID + << " finished with child-end function.\n"; + } + infokIDSample.erase(kID); } } } @@ -191,14 +317,23 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, static uint64_t invocationNum = 0; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); - } - if (NULL != beginScanCallee) { - uint64_t nestedkID = 0; - (*beginScanCallee)(name, devID, &nestedkID); - infokIDSample.insert({*kID, nestedkID}); + if ((rand() / (1.0 * RAND_MAX)) < (tool_prob_num / 100.0)) { + if (NULL != beginScanCallee) { + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << *kID + << " calling child-begin function...\n"; + } + uint64_t nestedkID = 0; + if (tool_globFence) { + invoke_ktools_fence(0); + } + (*beginScanCallee)(name, devID, &nestedkID); + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << *kID + << " finished with child-begin function.\n"; + } + infokIDSample.insert({*kID, nestedkID}); + } } } } @@ -208,10 +343,18 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { if (!(infokIDSample.find(kID) == infokIDSample.end())) { uint64_t retrievedNestedkID = infokIDSample[kID]; if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); + std::cout << "KokkosP: sample " << kID + << " calling child-end function...\n"; + } + if (tool_globFence) { + invoke_ktools_fence(0); } (*endScanCallee)(retrievedNestedkID); + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << kID + << " finished with child-end function.\n"; + } + infokIDSample.erase(kID); } } } @@ -222,28 +365,44 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, static uint64_t invocationNum = 0; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); - } - - if (NULL != beginReduceCallee) { - uint64_t nestedkID = 0; - (*beginReduceCallee)(name, devID, &nestedkID); - infokIDSample.insert({*kID, nestedkID}); + if ((rand() / (1.0 * RAND_MAX)) < (tool_prob_num / 100.0)) { + if (NULL != beginReduceCallee) { + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << *kID + << " calling child-begin function...\n"; + } + uint64_t nestedkID = 0; + if (tool_globFence) { + invoke_ktools_fence(0); + } + (*beginReduceCallee)(name, devID, &nestedkID); + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << *kID + << " finished with child-begin function.\n"; + } + infokIDSample.insert({*kID, nestedkID}); + } } } } void kokkosp_end_parallel_reduce(const uint64_t kID) { - if (NULL != endScanCallee) { + if (NULL != endReduceCallee) { if (!(infokIDSample.find(kID) == infokIDSample.end())) { uint64_t retrievedNestedkID = infokIDSample[kID]; if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); + std::cout << "KokkosP: sample " << kID + << " calling child-end function...\n"; } - (*endScanCallee)(retrievedNestedkID); + if (tool_globFence) { + invoke_ktools_fence(0); + } + (*endReduceCallee)(retrievedNestedkID); + if (tool_verbosity > 0) { + std::cout << "KokkosP: sample " << kID + << " finished with child-end function.\n"; + } + infokIDSample.erase(kID); } } } @@ -254,8 +413,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { extern "C" { namespace impl = KokkosTools::Sampler; - EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) +EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE( + impl::kokkosp_provide_tool_programming_interface) EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) diff --git a/common/kp_config.hpp.in b/common/kp_config.hpp.in index 77c160870..09f2ad0d7 100644 --- a/common/kp_config.hpp.in +++ b/common/kp_config.hpp.in @@ -3,6 +3,7 @@ #define USE_MPI @KOKKOSTOOLS_HAS_MPI@ #cmakedefine KOKKOSTOOLS_HAS_NVTX +#cmakedefine KOKKOSTOOLS_HAS_ROCTX #cmakedefine KOKKOSTOOLS_HAS_CALIPER #cmakedefine KOKKOSTOOLS_HAS_SYSTEMTAP #cmakedefine KOKKOSTOOLS_HAS_VARIORUM diff --git a/common/utils/demangle.hpp b/common/utils/demangle.hpp new file mode 100644 index 000000000..662d8a6fc --- /dev/null +++ b/common/utils/demangle.hpp @@ -0,0 +1,74 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSTOOLS_COMMON_UTILS_DEMANGLE_HPP +#define KOKKOSTOOLS_COMMON_UTILS_DEMANGLE_HPP + +#include + +#if defined(__GXX_ABI_VERSION) +#define HAVE_GCC_ABI_DEMANGLE +#endif + +#if defined(HAVE_GCC_ABI_DEMANGLE) +#include +#endif // HAVE_GCC_ABI_DEMANGLE + +namespace KokkosTools { + +//! Demangle @p mangled_name. +inline std::string demangleName(const std::string_view mangled_name) { +#if defined(HAVE_GCC_ABI_DEMANGLE) + int status = 0; + + char* demangled_name = + abi::__cxa_demangle(mangled_name.data(), nullptr, nullptr, &status); + + if (demangled_name) { + std::string ret(demangled_name); + std::free(demangled_name); + return ret; + } +#endif + return std::string(mangled_name); +} + +/** + * @brief Demangle @p mangled_name. + * + * This function supports @c Kokkos convention from + * @c Kokkos::Impl::ParallelConstructName. + * + * For instance, a kernel launched with a tag would appear as + * "/". + */ +inline std::string demangleNameKokkos(const std::string_view mangled_name) { + if (size_t pos = mangled_name.find('/', 0); + pos != std::string_view::npos && pos > 0) { + /// An explicit copy of the first part of the string is needed, because + /// @c abi::__cxa_demangle will parse the pointer until its NULL-terminated. + return demangleName(std::string(mangled_name.substr(0, pos))) + .append("/") + .append( + demangleName(mangled_name.substr(pos + 1, mangled_name.size()))); + } else { + return demangleName(mangled_name); + } +} + +} // namespace KokkosTools + +#endif // KOKKOSTOOLS_COMMON_UTILS_DEMANGLE_HPP diff --git a/debugging/kernel-logger/kp_kernel_logger.cpp b/debugging/kernel-logger/kp_kernel_logger.cpp index bf7f585bb..dc5b13167 100644 --- a/debugging/kernel-logger/kp_kernel_logger.cpp +++ b/debugging/kernel-logger/kp_kernel_logger.cpp @@ -100,7 +100,7 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, printf(" %s\n", name); } -extern "C" void kokkospk_end_parallel_scan(const uint64_t kID) { +extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { printf("KokkosP: Execution of kernel %llu is completed.\n", (unsigned long long)(kID)); } diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index e7490dbcb..41980d163 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -21,7 +21,6 @@ endmacro() # and exported output in expected format, fail the test otherwise. if(NOT WIN32) add_kp_test(kernel_timer "kernel-timer") - add_kp_test(kernel_timer_json "kernel-timer-json") add_kp_test(memory_events "memory-events") add_kp_test(memory_usage "memory-usage") add_kp_test(chrome_tracing "chrome-tracing") @@ -48,3 +47,6 @@ if(KOKKOSTOOLS_HAS_NVTX) add_kp_test(nvtx_connector "nvtx-connector") add_kp_test(nvtx_focused_connector "nvtx-focused-connector") endif() +if(KOKKOSTOOLS_HAS_ROCTX) + add_kp_test(roctx_connector "roctx-connector") +endif() diff --git a/kokkos.presets.json b/kokkos.presets.json index 73470877c..46f8c2315 100644 --- a/kokkos.presets.json +++ b/kokkos.presets.json @@ -31,9 +31,10 @@ "name" : "ROCm", "inherits" : "default", "cacheVariables" : { - "Kokkos_ENABLE_HIP" : "ON", - "Kokkos_ARCH_VEGA906" : "ON", - "CMAKE_CXX_COMPILER" : "hipcc" + "Kokkos_ENABLE_HIP" : "ON", + "Kokkos_ENABLE_ROCTHRUST" : "OFF", + "Kokkos_ARCH_VEGA906" : "ON", + "CMAKE_CXX_COMPILER" : "hipcc" } } ], diff --git a/profiling/all/CMakeLists.txt b/profiling/all/CMakeLists.txt index ce8b13e27..32ef0a1f7 100644 --- a/profiling/all/CMakeLists.txt +++ b/profiling/all/CMakeLists.txt @@ -5,7 +5,7 @@ set(LIBNAME kokkostools) # return() #endif() -add_library(${LIBNAME} ${KOKKOSTOOLS_LIBRARY_MODE} kp_all.cpp) +add_library(${LIBNAME} kp_all.cpp) target_include_directories(${LIBNAME} PUBLIC $ @@ -19,4 +19,4 @@ endif() file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS kp_all.hpp "${COMMON_HEADERS_PATH}/*.hpp") install(FILES ${HEADER_FILES} DESTINATION ${EXPORT_INCLUDE_DIR}) -install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME}) \ No newline at end of file +install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME}) diff --git a/profiling/all/impl/Kokkos_Profiling_C_Interface.h b/profiling/all/impl/Kokkos_Profiling_C_Interface.h index 33eaa3920..8c3194e43 100644 --- a/profiling/all/impl/Kokkos_Profiling_C_Interface.h +++ b/profiling/all/impl/Kokkos_Profiling_C_Interface.h @@ -1,3 +1,4 @@ +/* //@HEADER // ************************************************************************ // @@ -9,10 +10,11 @@ // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. +// // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER +*/ #ifndef KOKKOS_PROFILING_C_INTERFACE_HPP #define KOKKOS_PROFILING_C_INTERFACE_HPP @@ -26,10 +28,14 @@ #include #endif -#define KOKKOSP_INTERFACE_VERSION 20210623 +#define KOKKOSP_INTERFACE_VERSION 20211015 // Profiling +#ifdef __cplusplus +extern "C" { +#endif + struct Kokkos_Profiling_KokkosPDeviceInfo { size_t deviceID; }; @@ -152,7 +158,7 @@ enum Kokkos_Tools_OptimizationType { Kokkos_Tools_Maximize }; -struct Kokkos_Tools_OptimzationGoal { +struct Kokkos_Tools_OptimizationGoal { size_t type_id; enum Kokkos_Tools_OptimizationType goal; }; @@ -218,7 +224,7 @@ typedef void (*Kokkos_Tools_contextBeginFunction)(const size_t); typedef void (*Kokkos_Tools_contextEndFunction)( const size_t, struct Kokkos_Tools_VariableValue); typedef void (*Kokkos_Tools_optimizationGoalDeclarationFunction)( - const size_t, const struct Kokkos_Tools_OptimzationGoal goal); + const size_t, const struct Kokkos_Tools_OptimizationGoal goal); struct Kokkos_Profiling_EventSet { Kokkos_Profiling_initFunction init; @@ -265,4 +271,8 @@ struct Kokkos_Profiling_EventSet { // changing struct layout }; +#ifdef __cplusplus +} +#endif + #endif // KOKKOS_PROFILING_C_INTERFACE_HPP diff --git a/profiling/all/impl/Kokkos_Profiling_Interface.hpp b/profiling/all/impl/Kokkos_Profiling_Interface.hpp index 37acc23b6..b66886d9f 100644 --- a/profiling/all/impl/Kokkos_Profiling_Interface.hpp +++ b/profiling/all/impl/Kokkos_Profiling_Interface.hpp @@ -19,6 +19,7 @@ #include #include +#include #include @@ -45,6 +46,7 @@ enum struct DeviceType { HPX, Threads, SYCL, + OpenACC, Unknown }; @@ -53,6 +55,12 @@ struct ExecutionSpaceIdentifier { uint32_t device_id; uint32_t instance_id; }; + +constexpr const uint32_t num_type_bits = 8; +constexpr const uint32_t num_device_bits = 7; +constexpr const uint32_t num_instance_bits = 17; +constexpr const uint32_t num_avail_bits = sizeof(uint32_t) * CHAR_BIT; + inline DeviceType devicetype_from_uint32t(const uint32_t in) { switch (in) { case 0: return DeviceType::Serial; @@ -63,37 +71,35 @@ inline DeviceType devicetype_from_uint32t(const uint32_t in) { case 5: return DeviceType::HPX; case 6: return DeviceType::Threads; case 7: return DeviceType::SYCL; + case 8: return DeviceType::OpenACC; default: return DeviceType::Unknown; // TODO: error out? } } inline ExecutionSpaceIdentifier identifier_from_devid(const uint32_t in) { - // ExecutionSpaceIdentifier out; - // out.type = in >> 24; - // out.device_id = in >> 17; - // out.instance_id = ((uint32_t(-1)) << 17 ) & in; - return {devicetype_from_uint32t(in >> 24), - (~((uint32_t(-1)) << 24)) & (in >> 17), - (~((uint32_t(-1)) << 17)) & in}; + constexpr const uint32_t shift = num_avail_bits - num_type_bits; + + return {devicetype_from_uint32t(in >> shift), /*First 8 bits*/ + (~((uint32_t(-1)) << num_device_bits)) & + (in >> num_instance_bits), /*Next 7 bits */ + (~((uint32_t(-1)) << num_instance_bits)) & in}; /*Last 17 bits*/ } template struct DeviceTypeTraits; -constexpr const size_t device_type_bits = 8; -constexpr const size_t instance_bits = 24; template constexpr uint32_t device_id_root() { - /** uncomment when C++14 is enabled constexpr auto device_id = static_cast(DeviceTypeTraits::id); - return (device_id << instance_bits); - */ - return 0; + return (device_id << (num_instance_bits + num_device_bits)); } template inline uint32_t device_id(ExecutionSpace const& space) noexcept { - return device_id_root() + space.impl_instance_id(); + return device_id_root() + + (DeviceTypeTraits::device_id(space) + << num_instance_bits) + + space.impl_instance_id(); } } // namespace Experimental } // namespace Tools @@ -220,7 +226,7 @@ using ValueType = Kokkos_Tools_VariableInfo_ValueType; using CandidateValueType = Kokkos_Tools_VariableInfo_CandidateValueType; using SetOrRange = Kokkos_Tools_VariableInfo_SetOrRange; using VariableInfo = Kokkos_Tools_VariableInfo; -using OptimizationGoal = Kokkos_Tools_OptimzationGoal; +using OptimizationGoal = Kokkos_Tools_OptimizationGoal; using TuningString = Kokkos_Tools_Tuning_String; using VariableValue = Kokkos_Tools_VariableValue; diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp index 67419b039..52bc9bad9 100644 --- a/profiling/all/kp_all.cpp +++ b/profiling/all/kp_all.cpp @@ -30,7 +30,6 @@ #ifndef WIN32 KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimer) -KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimerJSON) KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryEvents) KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryUsage) KOKKOSTOOLS_EXTERN_EVENT_SET(HighwaterMark) @@ -52,6 +51,9 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector) KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXConnector) KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector) #endif +#ifdef KOKKOSTOOLS_HAS_ROCTX +KOKKOSTOOLS_EXTERN_EVENT_SET(ROCTXConnector) +#endif #ifdef KOKKOSTOOLS_HAS_CALIPER namespace cali { extern Kokkos::Tools::Experimental::EventSet get_kokkos_event_set( @@ -66,10 +68,9 @@ namespace KokkosTools { EventSet get_event_set(const char* profiler, const char* config_str) { std::map handlers; #ifndef WIN32 - handlers["kernel-timer"] = KernelTimer::get_event_set(); - handlers["kernel-timer-json"] = KernelTimerJSON::get_event_set(); - handlers["memory-events"] = MemoryEvents::get_event_set(); - handlers["memory-usage"] = MemoryUsage::get_event_set(); + handlers["kernel-timer"] = KernelTimer::get_event_set(); + handlers["memory-events"] = MemoryEvents::get_event_set(); + handlers["memory-usage"] = MemoryUsage::get_event_set(); #if USE_MPI handlers["highwater-mark-mpi"] = HighwaterMarkMPI::get_event_set(); #endif @@ -93,6 +94,9 @@ EventSet get_event_set(const char* profiler, const char* config_str) { #ifdef KOKKOSTOOLS_HAS_NVTX handlers["nvtx-connector"] = NVTXConnector::get_event_set(); handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set(); +#endif +#ifdef KOKKOSTOOLS_HAS_ROCTX + handlers["roctx-connector"] = ROCTXConnector::get_event_set(); #endif auto e = handlers.find(profiler); if (e != handlers.end()) return e->second; diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp index 5cb5ed391..cc51bc3d9 100644 --- a/profiling/all/kp_core.hpp +++ b/profiling/all/kp_core.hpp @@ -48,9 +48,17 @@ using Kokkos::Tools::SpaceHandle; #define EXPOSE_PROFILE_EVENT(FUNC_NAME) #define EXPOSE_BEGIN_FENCE(FUNC_NAME) #define EXPOSE_END_FENCE(FUNC_NAME) +#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) #else +#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_provide_tool_programming_interface( \ + const uint32_t num_actions, \ + Kokkos_Tools_ToolProgrammingInterface ptpi) { \ + FUNC_NAME(num_actions, ptpi); \ + } + #define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \ __attribute__((weak)) void kokkosp_request_tool_settings( \ const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \ diff --git a/profiling/papi-connector/CMakeLists.txt b/profiling/papi-connector/CMakeLists.txt index 478e996b1..961b4be95 100644 --- a/profiling/papi-connector/CMakeLists.txt +++ b/profiling/papi-connector/CMakeLists.txt @@ -1,3 +1,3 @@ -add_library(kp_papi_connector SHARED kp_papi_connector.cpp) +kp_add_library(kp_papi_connector kp_papi_connector.cpp) -target_link_libraries(kp_papi_connector PRIVATE PAPI::PAPI) \ No newline at end of file +target_link_libraries(kp_papi_connector PRIVATE PAPI::PAPI) diff --git a/profiling/roctx-connector/kp_roctx_connector.cpp b/profiling/roctx-connector/kp_roctx_connector.cpp index 593210aa3..6c1ea182e 100644 --- a/profiling/roctx-connector/kp_roctx_connector.cpp +++ b/profiling/roctx-connector/kp_roctx_connector.cpp @@ -21,6 +21,8 @@ #include #include +#include "kp_core.hpp" + namespace { struct Section { std::string label; @@ -29,20 +31,28 @@ struct Section { std::vector
kokkosp_sections; } // namespace -struct Kokkos_Tools_ToolSettings { - bool requires_global_fencing; - bool padding[255]; -}; +namespace KokkosTools { +namespace ROCTXConnector { + +static bool tool_globfences; -extern "C" void kokkosp_request_tool_settings( - const uint32_t, Kokkos_Tools_ToolSettings* settings) { - settings->requires_global_fencing = false; +void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings* settings) { + if (tool_globfences) { + settings->requires_global_fencing = true; + } else { + settings->requires_global_fencing = false; + } } -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t /*devInfoCount*/, - void* /*deviceInfo*/) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t /*devInfoCount*/, + Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { + const char* tool_global_fences = std::getenv("KOKKOS_TOOLS_GLOBALFENCES"); + if (tool_global_fences) { + tool_globfences = (atoi(tool_global_fences) != 0); + } + std::cout << "-----------------------------------------------------------\n" << "KokkosP: ROC Tracer Connector (sequence is " << loadSeq << ", version: " << interfaceVer << ")\n" @@ -51,7 +61,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, roctxMark("Kokkos::Initialization Complete"); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { std::cout << R"( ----------------------------------------------------------- KokkosP: Finalization of ROC Tracer Connector. Complete. @@ -61,66 +71,108 @@ KokkosP: Finalization of ROC Tracer Connector. Complete. roctxMark("Kokkos::Finalization Complete"); } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t /*devID*/, - uint64_t* /*kID*/) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, + uint64_t* /*kID*/) { roctxRangePush(name); } -extern "C" void kokkosp_end_parallel_for(const uint64_t /*kID*/) { - roctxRangePop(); -} +void kokkosp_end_parallel_for(const uint64_t /*kID*/) { roctxRangePop(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t /*devID*/, - uint64_t* /*kID*/) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, + uint64_t* /*kID*/) { roctxRangePush(name); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { - roctxRangePop(); -} +void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { roctxRangePop(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t /*devID*/, - uint64_t* /*kID*/) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, + uint64_t* /*kID*/) { roctxRangePush(name); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { - roctxRangePop(); -} +void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { roctxRangePop(); } -extern "C" void kokkosp_push_profile_region(char* name) { - roctxRangePush(name); -} +void kokkosp_push_profile_region(const char* name) { roctxRangePush(name); } -extern "C" void kokkosp_pop_profile_region() { roctxRangePop(); } +void kokkosp_pop_profile_region() { roctxRangePop(); } -extern "C" void kokkosp_create_profile_section(const char* name, - uint32_t* sID) { +void kokkosp_create_profile_section(const char* name, uint32_t* sID) { *sID = kokkosp_sections.size(); kokkosp_sections.push_back( {std::string(name), static_cast(-1)}); } -extern "C" void kokkosp_start_profile_section(const uint32_t sID) { +void kokkosp_start_profile_section(const uint32_t sID) { auto& section = kokkosp_sections[sID]; section.id = roctxRangeStart(section.label.c_str()); } -extern "C" void kokkosp_stop_profile_section(const uint32_t sID) { +void kokkosp_stop_profile_section(const uint32_t sID) { auto const& section = kokkosp_sections[sID]; roctxRangeStop(section.id); } -extern "C" void kokkosp_destroy_profile_section(const uint32_t sID) { +void kokkosp_destroy_profile_section(const uint32_t sID) { // do nothing } -extern "C" void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/, - uint64_t* fID) { +void kokkosp_profile_event(const char* name) { roctxMark(name); } + +void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/, + uint64_t* fID) { *fID = roctxRangeStart(name); } -extern "C" void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); } +void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.request_tool_settings = kokkosp_request_tool_settings; + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.create_profile_section = kokkosp_create_profile_section; + my_event_set.start_profile_section = kokkosp_start_profile_section; + my_event_set.stop_profile_section = kokkosp_stop_profile_section; + my_event_set.destroy_profile_section = kokkosp_destroy_profile_section; + my_event_set.profile_event = kokkosp_profile_event; + my_event_set.begin_fence = kokkosp_begin_fence; + my_event_set.end_fence = kokkosp_end_fence; + return my_event_set; +} + +} // namespace ROCTXConnector +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::ROCTXConnector; + +EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section) +EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section) +EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section) +EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section) +EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event); +EXPOSE_BEGIN_FENCE(impl::kokkosp_begin_fence); +EXPOSE_END_FENCE(impl::kokkosp_end_fence); +} // extern "C" diff --git a/profiling/simple-kernel-timer/CMakeLists.txt b/profiling/simple-kernel-timer/CMakeLists.txt index e512a1d67..ebd05a6a8 100644 --- a/profiling/simple-kernel-timer/CMakeLists.txt +++ b/profiling/simple-kernel-timer/CMakeLists.txt @@ -7,10 +7,6 @@ if(NOT MSVC) set_property(TARGET kp_kernel_shared PROPERTY POSITION_INDEPENDENT_CODE ON) endif() -# Add JSON kernel-timer -kp_add_library(kp_kernel_timer_json kp_kernel_timer_json.cpp) -target_link_libraries(kp_kernel_timer_json PRIVATE kp_kernel_shared) - # Add binary kernel-timer kp_add_library(kp_kernel_timer kp_kernel_timer.cpp) target_link_libraries(kp_kernel_timer PRIVATE kp_kernel_shared) diff --git a/profiling/simple-kernel-timer/Makefile b/profiling/simple-kernel-timer/Makefile index 7c8bebd7d..14f4bcea0 100644 --- a/profiling/simple-kernel-timer/Makefile +++ b/profiling/simple-kernel-timer/Makefile @@ -1,12 +1,12 @@ CXX=g++ -CXXFLAGS=-O3 -std=c++11 -g +CXXFLAGS=-O3 -std=c++17 -g SHARED_CXXFLAGS=-shared -fPIC all: kp_kernel_timer.so kp_reader kp_json_writer MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all -I${MAKEFILE_PATH}../../common kp_reader: ${MAKEFILE_PATH}kp_reader.cpp kp_kernel_timer.so $(CXX) $(CXXFLAGS) -o kp_reader ${MAKEFILE_PATH}kp_reader.cpp ${MAKEFILE_PATH}kp_shared.cpp diff --git a/profiling/simple-kernel-timer/kp_json_writer.cpp b/profiling/simple-kernel-timer/kp_json_writer.cpp index a1d62fc27..3d58b260a 100644 --- a/profiling/simple-kernel-timer/kp_json_writer.cpp +++ b/profiling/simple-kernel-timer/kp_json_writer.cpp @@ -14,7 +14,6 @@ // //@HEADER -// clang-format off #include #include #include @@ -28,7 +27,6 @@ using namespace KokkosTools::KernelTimer; -// clang-format on bool is_region(KernelPerformanceInfo const& kp) { return kp.getKernelType() == REGION; } @@ -55,77 +53,61 @@ inline void write_json(std::ostream& os, KernelPerformanceInfo const& kp, << '\n'; os << indent << '}'; } -// clang-format off - -int find_index(std::vector& kernels, - const char* kernelName) { - - for(unsigned int i = 0; i < kernels.size(); i++) { - KernelPerformanceInfo* nextKernel = kernels[i]; - - if(strcmp(nextKernel->getName(), kernelName) == 0) { - return i; - } - } - - return -1; -} int main(int argc, char* argv[]) { + if (argc == 1) { + fprintf(stderr, "Did you specify any data files on the command line!\n"); + fprintf(stderr, "Usage: ./kp_json_writer file1.dat [fileX.dat]*\n"); + exit(-1); + } - if(argc == 1) { - fprintf(stderr, "Did you specify any data files on the command line!\n"); - fprintf(stderr, "Usage: ./kp_json_writer file1.dat [fileX.dat]*\n"); - exit(-1); - } - - int commandline_args = 1; - while( (commandline_args kernelInfo; - double totalKernelsTime = 0; - double totalExecuteTime = 0; - uint64_t totalKernelsCalls = 0; + std::vector kernelInfo; + double totalKernelsTime = 0; + double totalExecuteTime = 0; + uint64_t totalKernelsCalls = 0; - for(int i = commandline_args; i < argc; i++) { - FILE* the_file = fopen(argv[i], "rb"); + for (int i = commandline_args; i < argc; i++) { + FILE* the_file = fopen(argv[i], "rb"); - double fileExecuteTime = 0; - fread(&fileExecuteTime, sizeof(fileExecuteTime), 1, the_file); + double fileExecuteTime = 0; + fread(&fileExecuteTime, sizeof(fileExecuteTime), 1, the_file); - totalExecuteTime += fileExecuteTime; + totalExecuteTime += fileExecuteTime; - while(! feof(the_file)) { - KernelPerformanceInfo* new_kernel = new KernelPerformanceInfo("", PARALLEL_FOR); - if(new_kernel->readFromFile(the_file)) { - if(strlen(new_kernel->getName()) > 0) { - int kernelIndex = find_index(kernelInfo, new_kernel->getName()); + while (!feof(the_file)) { + KernelPerformanceInfo* new_kernel = + new KernelPerformanceInfo("", PARALLEL_FOR); + if (new_kernel->readFromFile(the_file)) { + if (!new_kernel->getName().empty()) { + int kernelIndex = find_index(kernelInfo, new_kernel->getName()); - if(kernelIndex > -1) { - kernelInfo[kernelIndex]->addTime(new_kernel->getTime()); - kernelInfo[kernelIndex]->addCallCount(new_kernel->getCallCount()); - } else { - kernelInfo.push_back(new_kernel); - } - } - } - } + if (kernelIndex > -1) { + kernelInfo[kernelIndex]->addTime(new_kernel->getTime()); + kernelInfo[kernelIndex]->addCallCount(new_kernel->getCallCount()); + } else { + kernelInfo.push_back(new_kernel); + } + } + } + } - fclose(the_file); - } + fclose(the_file); + } - std::sort(kernelInfo.begin(), kernelInfo.end(), compareKernelPerformanceInfo); + std::sort(kernelInfo.begin(), kernelInfo.end(), compareKernelPerformanceInfo); - for(unsigned int i = 0; i < kernelInfo.size(); i++) { - if(kernelInfo[i]->getKernelType() != REGION) { - totalKernelsTime += kernelInfo[i]->getTime(); - totalKernelsCalls += kernelInfo[i]->getCallCount(); + for (unsigned int i = 0; i < kernelInfo.size(); i++) { + if (kernelInfo[i]->getKernelType() != REGION) { + totalKernelsTime += kernelInfo[i]->getTime(); + totalKernelsCalls += kernelInfo[i]->getCallCount(); } - } + } - // clang-format on // std::string filename = "test.json"; // std::ofstream fout(filename); auto& fout = std::cout; @@ -167,8 +149,6 @@ int main(int argc, char* argv[]) { fout << " ]\n"; fout << "}\n"; - // clang-format off - - return 0; + return 0; } diff --git a/profiling/simple-kernel-timer/kp_kernel_info.h b/profiling/simple-kernel-timer/kp_kernel_info.h index 93b7871eb..47f08d15f 100644 --- a/profiling/simple-kernel-timer/kp_kernel_info.h +++ b/profiling/simple-kernel-timer/kp_kernel_info.h @@ -22,29 +22,10 @@ #include #include -#if defined(__GXX_ABI_VERSION) -#define HAVE_GCC_ABI_DEMANGLE -#endif - -#if defined(HAVE_GCC_ABI_DEMANGLE) -#include -#endif // HAVE_GCC_ABI_DEMANGLE +#include "utils/demangle.hpp" namespace KokkosTools::KernelTimer { -inline char* demangleName(char* kernelName) { -#if defined(HAVE_GCC_ABI_DEMANGLE) - int status = -1; - char* demangledKernelName = - abi::__cxa_demangle(kernelName, NULL, NULL, &status); - if (status == 0) { - free(kernelName); - kernelName = demangledKernelName; - } -#endif // HAVE_GCC_ABI_DEMANGLE - return kernelName; -} - inline double seconds() { struct timeval now; gettimeofday(&now, NULL); @@ -62,15 +43,7 @@ enum KernelExecutionType { class KernelPerformanceInfo { public: KernelPerformanceInfo(std::string kName, KernelExecutionType kernelType) - : kType(kernelType) { - kernelName = (char*)malloc(sizeof(char) * (kName.size() + 1)); - strcpy(kernelName, kName.c_str()); - - callCount = 0; - time = 0; - } - - ~KernelPerformanceInfo() { free(kernelName); } + : kernelName(std::move(kName)), kType(kernelType) {} KernelExecutionType getKernelType() const { return kType; } @@ -95,7 +68,7 @@ class KernelPerformanceInfo { double getTimeSq() { return timeSq; } - char* getName() const { return kernelName; } + const std::string& getName() const { return kernelName; } void addCallCount(const uint64_t newCalls) { callCount += newCalls; } @@ -112,15 +85,9 @@ class KernelPerformanceInfo { copy((char*)&kernelNameLength, &entry[nextIndex], sizeof(kernelNameLength)); nextIndex += sizeof(kernelNameLength); - if (strlen(kernelName) > 0) { - free(kernelName); - } - - kernelName = (char*)malloc(sizeof(char) * (kernelNameLength + 1)); - copy(kernelName, &entry[nextIndex], kernelNameLength); - kernelName[kernelNameLength] = '\0'; + this->kernelName = std::string(&entry[nextIndex], kernelNameLength); - kernelName = demangleName(kernelName); + kernelName = demangleNameKokkos(kernelName); nextIndex += kernelNameLength; @@ -152,7 +119,7 @@ class KernelPerformanceInfo { } void writeToBinaryFile(FILE* output) { - const uint32_t kernelNameLen = (uint32_t)strlen(kernelName); + const uint32_t kernelNameLen = kernelName.size(); const uint32_t recordLen = sizeof(uint32_t) + sizeof(char) * kernelNameLen + sizeof(uint64_t) + sizeof(double) + sizeof(double) + sizeof(uint32_t); @@ -163,7 +130,7 @@ class KernelPerformanceInfo { copy(&entry[nextIndex], (char*)&kernelNameLen, sizeof(kernelNameLen)); nextIndex += sizeof(kernelNameLen); - copy(&entry[nextIndex], kernelName, kernelNameLen); + copy(&entry[nextIndex], kernelName.c_str(), kernelNameLen); nextIndex += kernelNameLen; copy(&entry[nextIndex], (char*)&callCount, sizeof(callCount)); @@ -191,7 +158,7 @@ class KernelPerformanceInfo { snprintf(indentBuffer, 256, "%s ", indent); fprintf(output, "%s\"kernel-name\" : \"%s\",\n", indentBuffer, - kernelName); + kernelName.c_str()); // fprintf(output, "%s\"region\" : \"%s\",\n", indentBuffer, // regionName); fprintf(output, "%s\"call-count\" : %llu,\n", indentBuffer, @@ -216,12 +183,12 @@ class KernelPerformanceInfo { } } - char* kernelName; + std::string kernelName; // const char* regionName; - uint64_t callCount; - double time; - double timeSq; - double startTime; + uint64_t callCount = 0; + double time = 0; + double timeSq = 0; + double startTime = 0; KernelExecutionType kType; }; diff --git a/profiling/simple-kernel-timer/kp_kernel_timer.cpp b/profiling/simple-kernel-timer/kp_kernel_timer.cpp index 33187c9c4..a015f5589 100644 --- a/profiling/simple-kernel-timer/kp_kernel_timer.cpp +++ b/profiling/simple-kernel-timer/kp_kernel_timer.cpp @@ -15,6 +15,7 @@ //@HEADER #include +#include #include #include #include @@ -25,6 +26,10 @@ namespace KokkosTools { namespace KernelTimer { +bool is_region(KernelPerformanceInfo const& kp) { + return kp.getKernelType() == REGION; +} + void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t /*devInfoCount*/, Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { @@ -52,23 +57,90 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, void kokkosp_finalize_library() { double finishTime = seconds(); + const char* kokkos_tools_timer_json_raw = getenv("KOKKOS_TOOLS_TIMER_JSON"); + const bool kokkos_tools_timer_json = + kokkos_tools_timer_json_raw == NULL + ? false + : strcmp(kokkos_tools_timer_json_raw, "1") == 0 || + strcmp(kokkos_tools_timer_json_raw, "true") == 0 || + strcmp(kokkos_tools_timer_json_raw, "True") == 0; + + double kernelTimes = 0; + char* hostname = (char*)malloc(sizeof(char) * 256); gethostname(hostname, 256); char* fileOutput = (char*)malloc(sizeof(char) * 256); - snprintf(fileOutput, 256, "%s-%d.dat", hostname, (int)getpid()); + snprintf(fileOutput, 256, "%s-%d.%s", hostname, (int)getpid(), + kokkos_tools_timer_json ? "json" : "dat"); free(hostname); FILE* output_data = fopen(fileOutput, "wb"); const double totalExecuteTime = (finishTime - initTime); - fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data); + if (!kokkos_tools_timer_json) { + fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data); - std::vector kernelList; + for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); + kernel_itr++) { + kernel_itr->second->writeToBinaryFile(output_data); + } + } else { + std::vector kernelList; + + for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); + kernel_itr++) { + kernelList.push_back(kernel_itr->second); + kernelTimes += kernel_itr->second->getTime(); + } + + std::sort(kernelList.begin(), kernelList.end(), + compareKernelPerformanceInfo); + + fprintf(output_data, "{\n\"kokkos-kernel-data\" : {\n"); + fprintf(output_data, " \"total-app-time\" : %10.3f,\n", + totalExecuteTime); + fprintf(output_data, " \"total-kernel-times\" : %10.3f,\n", + kernelTimes); + fprintf(output_data, " \"total-non-kernel-times\" : %10.3f,\n", + (totalExecuteTime - kernelTimes)); + + const double percentKokkos = (kernelTimes / totalExecuteTime) * 100.0; + fprintf(output_data, " \"percent-in-kernels\" : %6.2f,\n", + percentKokkos); + fprintf(output_data, " \"unique-kernel-calls\" : %22llu,\n", + (unsigned long long)count_map.size()); + fprintf(output_data, "\n"); + + fprintf(output_data, " \"region-perf-info\" : [\n"); + +#define KERNEL_INFO_INDENT " " + + bool print_comma = false; + for (auto const& kernel : count_map) { + if (!is_region(*std::get<1>(kernel))) continue; + if (print_comma) fprintf(output_data, ",\n"); + kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT); + print_comma = true; + } + + fprintf(output_data, "\n"); + fprintf(output_data, " ],\n"); + + fprintf(output_data, " \"kernel-perf-info\" : [\n"); + + print_comma = false; + for (auto const& kernel : count_map) { + if (is_region(*std::get<1>(kernel))) continue; + if (print_comma) fprintf(output_data, ",\n"); + kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT); + print_comma = true; + } + + fprintf(output_data, "\n"); + fprintf(output_data, " ]\n"); - for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); - kernel_itr++) { - kernel_itr->second->writeToBinaryFile(output_data); + fprintf(output_data, "}\n}"); } fclose(output_data); diff --git a/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp b/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp deleted file mode 100644 index 859fa3d7b..000000000 --- a/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp +++ /dev/null @@ -1,191 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include -#include -#include - -#include "kp_core.hpp" -#include "kp_shared.h" - -using namespace KokkosTools::KernelTimer; - -namespace KokkosTools { -namespace KernelTimerJSON { - -void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, - const uint32_t /*devInfoCount*/, - Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { - const char* output_delim_env = getenv("KOKKOSP_OUTPUT_DELIM"); - if (NULL == output_delim_env) { - outputDelimiter = (char*)malloc(sizeof(char) * 2); - snprintf(outputDelimiter, 2, "%c", ' '); - } else { - outputDelimiter = - (char*)malloc(sizeof(char) * (strlen(output_delim_env) + 1)); - strcpy(outputDelimiter, output_delim_env); - } - - printf( - "KokkosP: LDMS JSON Connector Initialized (sequence is %d, version: " - "%llu)\n", - loadSeq, (long long unsigned int)interfaceVer); - - initTime = seconds(); -} - -void kokkosp_finalize_library() { - double finishTime = seconds(); - double kernelTimes = 0; - - char* mpi_rank = getenv("OMPI_COMM_WORLD_RANK"); - - char* hostname = (char*)malloc(sizeof(char) * 256); - gethostname(hostname, 256); - - char* fileOutput = (char*)malloc(sizeof(char) * 256); - snprintf(fileOutput, 256, "%s-%d-%s.json", hostname, (int)getpid(), - (NULL == mpi_rank) ? "0" : mpi_rank); - - free(hostname); - FILE* output_data = fopen(fileOutput, "w"); - - const double totalExecuteTime = (finishTime - initTime); - std::vector kernelList; - - for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); - kernel_itr++) { - kernelList.push_back(kernel_itr->second); - kernelTimes += kernel_itr->second->getTime(); - } - - std::sort(kernelList.begin(), kernelList.end(), compareKernelPerformanceInfo); - - fprintf(output_data, "{\n\"kokkos-kernel-data\" : {\n"); - fprintf(output_data, " \"mpi-rank\" : %s,\n", - (NULL == mpi_rank) ? "0" : mpi_rank); - fprintf(output_data, " \"total-app-time\" : %10.3f,\n", - totalExecuteTime); - fprintf(output_data, " \"total-kernel-times\" : %10.3f,\n", - kernelTimes); - fprintf(output_data, " \"total-non-kernel-times\" : %10.3f,\n", - (totalExecuteTime - kernelTimes)); - - const double percentKokkos = (kernelTimes / totalExecuteTime) * 100.0; - fprintf(output_data, " \"percent-in-kernels\" : %6.2f,\n", - percentKokkos); - fprintf(output_data, " \"unique-kernel-calls\" : %22llu,\n", - (unsigned long long)count_map.size()); - fprintf(output_data, "\n"); - - fprintf(output_data, " \"kernel-perf-info\" : [\n"); - -#define KERNEL_INFO_INDENT " " - - bool print_comma = false; - for (auto const& kernel : count_map) { - if (print_comma) fprintf(output_data, ",\n"); - kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT); - print_comma = true; - } - - fprintf(output_data, "\n"); - fprintf(output_data, " ]\n"); - fprintf(output_data, "}\n}"); - fclose(output_data); -} - -void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, - uint64_t* kID) { - *kID = uniqID++; - - if ((NULL == name) || (strcmp("", name) == 0)) { - fprintf(stderr, "Error: kernel is empty\n"); - exit(-1); - } - - increment_counter(name, PARALLEL_FOR); -} - -void kokkosp_end_parallel_for(const uint64_t /*kID*/) { - currentEntry->addFromTimer(); -} - -void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, - uint64_t* kID) { - *kID = uniqID++; - - if ((NULL == name) || (strcmp("", name) == 0)) { - fprintf(stderr, "Error: kernel is empty\n"); - exit(-1); - } - - increment_counter(name, PARALLEL_SCAN); -} - -void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { - currentEntry->addFromTimer(); -} - -void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, - uint64_t* kID) { - *kID = uniqID++; - - if ((NULL == name) || (strcmp("", name) == 0)) { - fprintf(stderr, "Error: kernel is empty\n"); - exit(-1); - } - - increment_counter(name, PARALLEL_REDUCE); -} - -void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { - currentEntry->addFromTimer(); -} - -Kokkos::Tools::Experimental::EventSet get_event_set() { - Kokkos::Tools::Experimental::EventSet my_event_set; - memset(&my_event_set, 0, - sizeof(my_event_set)); // zero any pointers not set here - my_event_set.init = kokkosp_init_library; - my_event_set.finalize = kokkosp_finalize_library; - my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; - my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; - my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; - my_event_set.end_parallel_for = kokkosp_end_parallel_for; - my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; - my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; - return my_event_set; -} - -} // namespace KernelTimerJSON -} // namespace KokkosTools - -extern "C" { - -namespace impl = KokkosTools::KernelTimerJSON; - -EXPOSE_INIT(impl::kokkosp_init_library) -EXPOSE_FINALIZE(impl::kokkosp_finalize_library) -EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) -EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) -EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) -EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) -EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) -EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) - -} // extern "C" diff --git a/profiling/simple-kernel-timer/kp_reader.cpp b/profiling/simple-kernel-timer/kp_reader.cpp index 094c0a8f1..7225d1c00 100644 --- a/profiling/simple-kernel-timer/kp_reader.cpp +++ b/profiling/simple-kernel-timer/kp_reader.cpp @@ -25,19 +25,6 @@ using namespace KokkosTools::KernelTimer; -int find_index(std::vector& kernels, - const char* kernelName) { - for (unsigned int i = 0; i < kernels.size(); i++) { - KernelPerformanceInfo* nextKernel = kernels[i]; - - if (strcmp(nextKernel->getName(), kernelName) == 0) { - return i; - } - } - - return -1; -} - int main(int argc, char* argv[]) { if (argc == 1) { fprintf(stderr, "Did you specify any data files on the command line!\n"); @@ -77,7 +64,7 @@ int main(int argc, char* argv[]) { KernelPerformanceInfo* new_kernel = new KernelPerformanceInfo("", PARALLEL_FOR); if (new_kernel->readFromFile(the_file)) { - if (strlen(new_kernel->getName()) > 0) { + if (!new_kernel->getName().empty()) { int kernelIndex = find_index(kernelInfo, new_kernel->getName()); if (kernelIndex > -1) { @@ -102,6 +89,13 @@ int main(int argc, char* argv[]) { } } + printf( + " (Type) Total Time, Call Count, Avg. Time per Call, %%Total Time in " + "Kernels, %%Total Program Time\n"); + printf( + "------------------------------------------------------------------------" + "-\n\n"); + printf("Regions: \n\n"); for (unsigned int i = 0; i < kernelInfo.size(); i++) { @@ -110,7 +104,7 @@ int main(int argc, char* argv[]) { if (kernelInfo[i]->getKernelType() != REGION) continue; if (fixed_width) printf("- %100s\n%11s%c%15.5f%c%12" PRIu64 "%c%15.5f%c%7.3f%c%7.3f\n", - kernelInfo[i]->getName(), + kernelInfo[i]->getName().c_str(), (kernelInfo[i]->getKernelType() == PARALLEL_FOR) ? (" (ParFor) ") : ((kernelInfo[i]->getKernelType() == PARALLEL_REDUCE) @@ -125,7 +119,7 @@ int main(int argc, char* argv[]) { (kernelInfo[i]->getTime() / totalExecuteTime) * 100.0); else printf("- %s\n%s%c%f%c%" PRIu64 "%c%f%c%f%c%f\n", - kernelInfo[i]->getName(), + kernelInfo[i]->getName().c_str(), (kernelInfo[i]->getKernelType() == PARALLEL_FOR) ? (" (ParFor) ") : ((kernelInfo[i]->getKernelType() == PARALLEL_REDUCE) @@ -152,7 +146,7 @@ int main(int argc, char* argv[]) { if (kernelInfo[i]->getKernelType() == REGION) continue; if (fixed_width) printf("- %100s\n%11s%c%15.5f%c%12" PRIu64 "%c%15.5f%c%7.3f%c%7.3f\n", - kernelInfo[i]->getName(), + kernelInfo[i]->getName().c_str(), (kernelInfo[i]->getKernelType() == PARALLEL_FOR) ? (" (ParFor) ") : ((kernelInfo[i]->getKernelType() == PARALLEL_REDUCE) @@ -167,7 +161,7 @@ int main(int argc, char* argv[]) { (kernelInfo[i]->getTime() / totalExecuteTime) * 100.0); else printf("- %s\n%s%c%f%c%" PRIu64 "%c%f%c%f%c%f\n", - kernelInfo[i]->getName(), + kernelInfo[i]->getName().c_str(), (kernelInfo[i]->getKernelType() == PARALLEL_FOR) ? (" (ParFor) ") : ((kernelInfo[i]->getKernelType() == PARALLEL_REDUCE) diff --git a/profiling/simple-kernel-timer/kp_shared.h b/profiling/simple-kernel-timer/kp_shared.h index f719f5b20..29540ef98 100644 --- a/profiling/simple-kernel-timer/kp_shared.h +++ b/profiling/simple-kernel-timer/kp_shared.h @@ -19,6 +19,8 @@ #include #include +#include + #include "kp_kernel_info.h" namespace KokkosTools::KernelTimer { @@ -39,6 +41,16 @@ inline bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, return left->getTime() > right->getTime(); }; +inline int find_index(const std::vector& kernels, + const std::string& kernelName) { + for (unsigned int i = 0; i < kernels.size(); ++i) { + if (kernels[i]->getName() == kernelName) { + return i; + } + } + return -1; +} + } // namespace KokkosTools::KernelTimer #endif // _H_KOKKOSP_KERNEL_SHARED diff --git a/profiling/space-time-stack/Makefile b/profiling/space-time-stack/Makefile index 6ed5971ea..0a71839f2 100644 --- a/profiling/space-time-stack/Makefile +++ b/profiling/space-time-stack/Makefile @@ -1,12 +1,12 @@ CXX=mpicxx -CXXFLAGS=-shared -O3 -g -fPIC -std=c++11 -Wall -Wextra +CXXFLAGS=-shared -O3 -g -fPIC -std=c++17 -Wall -Wextra #Turn MPI support off: #CXXFLAGS += -DUSE_MPI=0 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all -I${MAKEFILE_PATH}../../common kp_space_time_stack.so: ${MAKEFILE_PATH}kp_space_time_stack.cpp $(CXX) $(CXXFLAGS) -o $@ $< diff --git a/profiling/space-time-stack/kp_space_time_stack.cpp b/profiling/space-time-stack/kp_space_time_stack.cpp index 292021787..a5cbfed8e 100644 --- a/profiling/space-time-stack/kp_space_time_stack.cpp +++ b/profiling/space-time-stack/kp_space_time_stack.cpp @@ -31,6 +31,8 @@ #include #include +#include "utils/demangle.hpp" + #include "kp_core.hpp" #if USE_MPI @@ -519,12 +521,20 @@ struct Allocations { Allocations() : total_size(0) {} void allocate(std::string&& name, const void* ptr, std::uint64_t size, StackNode* frame) { + if (ptr == nullptr) { + assert(size == 0); + return; + } auto res = alloc_set.emplace(Allocation(std::move(name), ptr, size, frame)); assert(res.second); total_size += size; } void deallocate(std::string&& name, const void* ptr, std::uint64_t size, StackNode* frame) { + if (ptr == nullptr) { + assert(size == 0); + return; + } auto key = Allocation(std::move(name), ptr, size, frame); auto it = alloc_set.find(key); if (it == alloc_set.end()) { @@ -741,7 +751,7 @@ struct State { } void begin_frame(const char* name, StackKind kind) { - std::string name_str(name); + std::string name_str(demangleNameKokkos(name)); stack_frame = stack_frame->get_child(std::move(name_str), kind); stack_frame->begin(); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 000000000..4ca3a79b6 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,116 @@ +# Add an executable and its related test. +# +# The executable is always linked to 'kokkostools' and 'test_common'. +# +# Arguments: +# TARGET_NAME : name of the test (required) +# SOURCE_FILE : source file, defaults to .cpp (optional) +# KOKKOS_TOOLS_LIBS : the test environment will receive the variable 'KOKKOS_TOOLS_LIBS' that is set as the path +# to the target file of this argument (optional) +# KOKKOS_TOOLS_SAMPLER_VERBOSE : the test environment will receive the variable 'KOKKOS_TOOLS_SAMPLER_VERBOSE' that is set as the value of 1 for printing the sample has been taken +# KOKKOS_TOOLS_GLOBALFENCES : test environment receives the variable 'KOKKOS_TOOLS_GLOBALFENCES' that is set as the value of 1 to turn the tool's auto-fencing on. +# KOKKOS_TOOLS_RANDOM_SEED : test environment receives the variable 'KOKKOS_TOOLS_RANDOM_SEED' that is set as the value for a seed of the random number generator (used for testing repeatability). +# KOKKOS_TOOLS_SAMPLER_SKIP : test environment receives the variable 'KOKKOS_TOOLS_SAMPLER_SKIP' that is set as the value of the number of Kokkos kernel invocations to skip before a tooling activity is invoked. +# KOKKOS_TOOLS_SAMPLER_PROB : test environment receives the variable 'KOKKOS_TOOLS_SAMPLER_PROB' that is set as the probability that a Kokkos kernel invocation has a tooling activity invoked for it. + + +function(kp_add_executable_and_test) + cmake_parse_arguments(kaeat_args "" "TARGET_NAME;SOURCE_FILE;KOKKOS_TOOLS_SAMPLER_VERBOSE;KOKKOS_TOOLS_GLOBALFENCES;KOKKOS_TOOLS_SAMPLER_SKIP;KOKKOS_TOOLS_SAMPLER_PROB;KOKKOS_TOOLS_RANDOM_SEED" "KOKKOS_TOOLS_LIBS" ${ARGN}) + if(NOT DEFINED kaeat_args_TARGET_NAME) + message(FATAL_ERROR "'TARGET_NAME' is a required argument.") + endif() + + if(NOT DEFINED kaeat_args_SOURCE_FILE) + set(kaeat_args_SOURCE_FILE "${kaeat_args_TARGET_NAME}.cpp") + endif() + + add_executable(${kaeat_args_TARGET_NAME}) + + target_sources( + ${kaeat_args_TARGET_NAME} + PRIVATE + ${kaeat_args_SOURCE_FILE} + ) + target_link_libraries( + ${kaeat_args_TARGET_NAME} + PRIVATE + test_common + ) + + add_test( + NAME ${kaeat_args_TARGET_NAME} + COMMAND $ + ) + + if(DEFINED kaeat_args_KOKKOS_TOOLS_LIBS) + set(TOOL_LIBS_FILES) + foreach(TOOL_LIB ${kaeat_args_KOKKOS_TOOLS_LIBS}) + list(APPEND TOOL_LIBS_FILES "$") + endforeach() + string(REPLACE ";" "\;" TOOL_LIBS_FILES "${TOOL_LIBS_FILES}") + + set_property( + TEST ${kaeat_args_TARGET_NAME} + APPEND PROPERTY ENVIRONMENT "KOKKOS_TOOLS_LIBS=${TOOL_LIBS_FILES}" + ) + endif() + + if(DEFINED kaeat_args_KOKKOS_TOOLS_SAMPLER_VERBOSE) + set_property( + TEST ${kaeat_args_TARGET_NAME} + APPEND + PROPERTY + ENVIRONMENT "KOKKOS_TOOLS_SAMPLER_VERBOSE=${kaeat_args_KOKKOS_TOOLS_SAMPLER_VERBOSE}" + ) + endif() + + if(DEFINED kaeat_args_KOKKOS_TOOLS_GLOBALFENCES) + set_property( + TEST ${kaeat_args_TARGET_NAME} + APPEND + PROPERTY + ENVIRONMENT "KOKKOS_TOOLS_GLOBALFENCES=${kaeat_args_KOKKOS_TOOLS_GLOBALFENCES}" + ) + endif() + + if (DEFINED kaeat_args_KOKKOS_TOOLS_SAMPLER_SKIP) + set_property( + TEST ${kaeat_args_TARGET_NAME} + APPEND + PROPERTY + ENVIRONMENT "KOKKOS_TOOLS_SAMPLER_SKIP=${kaeat_args_KOKKOS_TOOLS_SAMPLER_SKIP}" + ) + endif() + + if (DEFINED kaeat_args_KOKKOS_TOOLS_SAMPLER_PROB) + set_property( + TEST ${kaeat_args_TARGET_NAME} + APPEND + PROPERTY + ENVIRONMENT "KOKKOS_TOOLS_SAMPLER_PROB=${kaeat_args_KOKKOS_TOOLS_SAMPLER_PROB}" + ) + endif() + + if (DEFINED kaeat_args_KOKKOS_TOOLS_RANDOM_SEED) + set_property( + TEST ${kaeat_args_TARGET_NAME} + APPEND + PROPERTY + ENVIRONMENT "KOKKOS_TOOLS_RANDOM_SEED=${kaeat_args_KOKKOS_TOOLS_RANDOM_SEED}" + ) + endif() + +endfunction(kp_add_executable_and_test) + +# Create a test library that contains the required Kokkos and Google Test +# initialization sequence. +add_library(test_common OBJECT) +target_sources( + test_common + PRIVATE + UnitTestMain.cpp +) +target_link_libraries(test_common PUBLIC GTest::gtest GTest::gmock Kokkos::kokkos) + +add_subdirectory(space-time-stack) +add_subdirectory(sampler) diff --git a/tests/UnitTestMain.cpp b/tests/UnitTestMain.cpp new file mode 100644 index 000000000..066d22adc --- /dev/null +++ b/tests/UnitTestMain.cpp @@ -0,0 +1,12 @@ +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +//! Main entry point for tests. +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + + auto success = RUN_ALL_TESTS(); + + return success; +} diff --git a/tests/sampler/CMakeLists.txt b/tests/sampler/CMakeLists.txt new file mode 100644 index 000000000..a6c86f404 --- /dev/null +++ b/tests/sampler/CMakeLists.txt @@ -0,0 +1,86 @@ +## tests for skip rate 5, no fencing selected + +kp_add_executable_and_test( + TARGET_NAME test_sampling_parfor + SOURCE_FILE test_parfor.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_SAMPLER_SKIP 5 +) + +kp_add_executable_and_test( + TARGET_NAME test_sampling_parscan + SOURCE_FILE test_parscan.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_SAMPLER_SKIP 5 +) + +kp_add_executable_and_test( + TARGET_NAME test_sampling_parreduce + SOURCE_FILE test_parreduce.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_SAMPLER_SKIP 5 +) + +## tests for probability of 51.6% (with skip rate 0), no fencing selected + +kp_add_executable_and_test( + TARGET_NAME test_sampling_prob_parfor + SOURCE_FILE test_parfor_prob.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_RANDOM_SEED 2 + KOKKOS_TOOLS_SAMPLER_PROB 51.6 +) + +kp_add_executable_and_test( + TARGET_NAME test_sampling_prob_parscan + SOURCE_FILE test_parscan_prob.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_RANDOM_SEED 2 + KOKKOS_TOOLS_SAMPLER_PROB 51.6 +) + +kp_add_executable_and_test( + TARGET_NAME test_sampling_prob_parreduce + SOURCE_FILE test_parreduce_prob.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_RANDOM_SEED 2 + KOKKOS_TOOLS_SAMPLER_PROB 51.6 +) + +## tests for probability of 51.6% (with skip rate 0), with fences + +kp_add_executable_and_test( + TARGET_NAME test_sampling_prob_parfor_fence + SOURCE_FILE test_parfor_prob.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_RANDOM_SEED 2 + KOKKOS_TOOLS_SAMPLER_PROB 51.6 + KOKKOS_TOOLS_GLOBALFENCES 1 +) + +kp_add_executable_and_test( + TARGET_NAME test_sampling_prob_parscan_fence + SOURCE_FILE test_parscan_prob.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_RANDOM_SEED 2 + KOKKOS_TOOLS_SAMPLER_PROB 51.6 + KOKKOS_TOOLS_GLOBALFENCES 1 +) + +kp_add_executable_and_test( + TARGET_NAME test_sampling_prob_parreduce_fence + SOURCE_FILE test_parreduce_prob.cpp + KOKKOS_TOOLS_LIBS kp_kokkos_sampler kp_kernel_logger + KOKKOS_TOOLS_SAMPLER_VERBOSE 2 + KOKKOS_TOOLS_RANDOM_SEED 2 + KOKKOS_TOOLS_SAMPLER_PROB 51.6 + KOKKOS_TOOLS_GLOBALFENCES 1 +) diff --git a/tests/sampler/matchersProb.hpp b/tests/sampler/matchersProb.hpp new file mode 100644 index 000000000..a6fb98f94 --- /dev/null +++ b/tests/sampler/matchersProb.hpp @@ -0,0 +1,42 @@ +#pragma once + +static const std::vector matchers{ + "KokkosP: sample 3 calling child-begin function...", + "KokkosP: sample 3 finished with child-begin function.", + "KokkosP: sample 3 calling child-end function...", + "KokkosP: sample 3 finished with child-end function.", + + "KokkosP: sample 4 calling child-begin function...", + "KokkosP: sample 4 finished with child-begin function.", + "KokkosP: sample 4 calling child-end function...", + "KokkosP: sample 4 finished with child-end function.", + + "KokkosP: sample 5 calling child-begin function...", + "KokkosP: sample 5 finished with child-begin function.", + "KokkosP: sample 5 calling child-end function...", + "KokkosP: sample 5 finished with child-end function.", + + "KokkosP: sample 6 calling child-begin function...", + "KokkosP: sample 6 finished with child-begin function.", + "KokkosP: sample 6 calling child-end function...", + "KokkosP: sample 6 finished with child-end function.", + + "KokkosP: sample 8 calling child-begin function...", + "KokkosP: sample 8 finished with child-begin function.", + "KokkosP: sample 8 calling child-end function...", + "KokkosP: sample 8 finished with child-end function.", + + "KokkosP: sample 12 calling child-begin function...", + "KokkosP: sample 12 finished with child-begin function.", + "KokkosP: sample 12 calling child-end function...", + "KokkosP: sample 12 finished with child-end function.", + + "KokkosP: sample 13 calling child-begin function...", + "KokkosP: sample 13 finished with child-begin function.", + "KokkosP: sample 13 calling child-end function...", + "KokkosP: sample 13 finished with child-end function.", + + "KokkosP: sample 14 calling child-begin function...", + "KokkosP: sample 14 finished with child-begin function.", + "KokkosP: sample 14 calling child-end function...", + "KokkosP: sample 14 finished with child-end function."}; diff --git a/tests/sampler/matchersSkip.hpp b/tests/sampler/matchersSkip.hpp new file mode 100644 index 000000000..8a3f6887d --- /dev/null +++ b/tests/sampler/matchersSkip.hpp @@ -0,0 +1,11 @@ +#pragma once + +static const std::vector matchers{ + "KokkosP: sample 6 calling child-begin function...", + "KokkosP: sample 6 finished with child-begin function.", + "KokkosP: sample 6 calling child-end function...", + "KokkosP: sample 6 finished with child-end function.", + "KokkosP: sample 12 calling child-begin function...", + "KokkosP: sample 12 finished with child-begin function.", + "KokkosP: sample 12 calling child-end function...", + "KokkosP: sample 12 finished with child-end function."}; diff --git a/tests/sampler/parfor.hpp b/tests/sampler/parfor.hpp new file mode 100644 index 000000000..0f1649456 --- /dev/null +++ b/tests/sampler/parfor.hpp @@ -0,0 +1,19 @@ +#pragma once + +struct Tester { + template + explicit Tester(const execution_space& space) { + //! Explicitly launch a kernel with a name, and run it 15 times with kernel + //! logger. Use a periodic sampling with skip rate 5. This should print + //! out 2 invocations, and there is a single matcher with a regular + //! expression to check this. + + for (int iter = 0; iter < 15; iter++) { + Kokkos::parallel_for("named kernel", + Kokkos::RangePolicy(space, 0, 1), + *this); + } + } + + KOKKOS_FUNCTION void operator()(const int) const {} +}; diff --git a/tests/sampler/parreduce.hpp b/tests/sampler/parreduce.hpp new file mode 100644 index 000000000..2bc43a154 --- /dev/null +++ b/tests/sampler/parreduce.hpp @@ -0,0 +1,21 @@ +#pragma once + +struct Tester { + template + explicit Tester(const execution_space& space) { + //! Explicitly launch a kernel with a name, and run it 15 times with kernel + //! logger. Use a periodic sampling with skip rate 5. This should print + //! out 2 invocations, and there is a single matcher with a regular + //! expression to check this. + + long int sum; + for (int iter = 0; iter < 15; iter++) { + sum = 0; + Kokkos::parallel_reduce("named kernel reduce", + Kokkos::RangePolicy(space, 0, 1), + *this, sum); + } + } + + KOKKOS_FUNCTION void operator()(const int, long int&) const {} +}; diff --git a/tests/sampler/parscan.hpp b/tests/sampler/parscan.hpp new file mode 100644 index 000000000..082191edf --- /dev/null +++ b/tests/sampler/parscan.hpp @@ -0,0 +1,21 @@ +#pragma once + +struct Tester { + template + explicit Tester(const execution_space& space) { + //! Explicitly launch a kernel with a name, and run it 15 times with kernel + //! logger. Use a periodic sampling with skip rate 5. This should print + //! out 2 invocations, and there is a single matcher with a regular + //! expression to check this. + + long int N = 1024; + long int result; + + for (int iter = 0; iter < 15; iter++) { + result = 0; + Kokkos::parallel_scan("named kernel scan", N, *this, result); + } + } + + KOKKOS_FUNCTION void operator()(const int, long int&, bool) const {} +}; diff --git a/tests/sampler/test_parfor.cpp b/tests/sampler/test_parfor.cpp new file mode 100644 index 000000000..98da58edb --- /dev/null +++ b/tests/sampler/test_parfor.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +using ::testing::Contains; +using ::testing::HasSubstr; +using ::testing::Not; + +#include "parfor.hpp" +#include "matchersSkip.hpp" + +/** + * @test This test checks that the tool effectively samples. + * + */ + +TEST(SamplerSkipTest, parfor) { + //! Initialize @c Kokkos. + Kokkos::initialize(); + + //! Redirect output for later analysis. + std::cout.flush(); + std::ostringstream output; + std::streambuf* coutbuf = std::cout.rdbuf(output.rdbuf()); + + //! Run tests. @todo Replace this with Google Test. + Tester tester(Kokkos::DefaultExecutionSpace{}); + + //! Finalize @c Kokkos. + Kokkos::finalize(); + + //! Restore output buffer. + std::cout.flush(); + std::cout.rdbuf(coutbuf); + std::cout << output.str() << std::endl; + + //! Analyze test output. + for (const auto& matcher : matchers) { + EXPECT_THAT(output.str(), HasSubstr(matcher)); + } + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 1 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 2 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 3 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 4 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 5 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 7 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 8 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 9 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 10 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 11 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 13 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 14 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 15 calling"))); + + int occurrences = 0; + std::string::size_type pos = 0; + std::string samplerTestOutput(output.str()); + std::string target("calling child-begin function"); + while ((pos = samplerTestOutput.find(target, pos)) != std::string::npos) { + ++occurrences; + pos += target.length(); + } + EXPECT_EQ(occurrences, 2); + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: FATAL: No child library of " + "sampler utility library to call"))); + + EXPECT_THAT(output.str(), + Not(HasSubstr("KokkosP: FATAL: Kokkos Tools Programming " + "Interface's tool-invoked Fence is NULL!"))); +} \ No newline at end of file diff --git a/tests/sampler/test_parfor_prob.cpp b/tests/sampler/test_parfor_prob.cpp new file mode 100644 index 000000000..7c43f3492 --- /dev/null +++ b/tests/sampler/test_parfor_prob.cpp @@ -0,0 +1,72 @@ + +#include +#include +#include +#include +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +using ::testing::Contains; +using ::testing::HasSubstr; +using ::testing::Not; + +#include "parfor.hpp" +#include "matchersProb.hpp" + +/** + * @test This test checks that the tool effectively samples. + * + */ + +TEST(SamplerProbTest, parfor) { + //! Initialize @c Kokkos. + Kokkos::initialize(); + + //! Redirect output for later analysis. + std::cout.flush(); + std::ostringstream output; + std::streambuf* coutbuf = std::cout.rdbuf(output.rdbuf()); + + //! Run tests. @todo Replace this with Google Test. + Tester tester(Kokkos::DefaultExecutionSpace{}); + + //! Finalize @c Kokkos. + Kokkos::finalize(); + + //! Restore output buffer. + std::cout.flush(); + std::cout.rdbuf(coutbuf); + std::cout << output.str() << std::endl; + + //! Analyze test output. + for (const auto& matcher : matchers) { + EXPECT_THAT(output.str(), HasSubstr(matcher)); + } + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 1 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 2 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 7 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 9 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 10 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 11 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 15 calling"))); + + int occurrences = 0; + std::string::size_type pos = 0; + std::string samplerTestOutput(output.str()); + std::string target("calling child-begin function"); + while ((pos = samplerTestOutput.find(target, pos)) != std::string::npos) { + ++occurrences; + pos += target.length(); + } + EXPECT_EQ(occurrences, 8); + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: FATAL: No child library of " + "sampler utility library to call"))); + + EXPECT_THAT(output.str(), + Not(HasSubstr("KokkosP: FATAL: Kokkos Tools Programming " + "Interface's tool-invoked Fence is NULL!"))); +} diff --git a/tests/sampler/test_parreduce.cpp b/tests/sampler/test_parreduce.cpp new file mode 100644 index 000000000..fc9fdff3e --- /dev/null +++ b/tests/sampler/test_parreduce.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +using ::testing::HasSubstr; +using ::testing::Not; + +#include "parreduce.hpp" +#include "matchersSkip.hpp" + +/* + * @test This test checks that the sampling utility properly samples. + * + */ + +TEST(SamplerSkipTest, parreduce) { + //! Initialize @c Kokkos. + Kokkos::initialize(); + + //! Redirect output for later analysis. + std::cout.flush(); + std::ostringstream output; + std::streambuf* coutbuf = std::cout.rdbuf(output.rdbuf()); + + //! Run tests. @todo Replace this with Google Test. + Tester tester(Kokkos::DefaultExecutionSpace{}); + + //! Finalize @c Kokkos. + Kokkos::finalize(); + + //! Restore output buffer. + std::cout.flush(); + std::cout.rdbuf(coutbuf); + std::cout << output.str() << std::endl; + + //! Analyze test output. + for (const auto& matcher : matchers) { + EXPECT_THAT(output.str(), HasSubstr(matcher)); + } + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 1 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 2 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 3 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 4 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 5 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 7 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 8 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 9 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 10 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 11 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 13 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 14 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 15 calling"))); + + int occurrences = 0; + std::string::size_type pos = 0; + std::string samplerTestOutput(output.str()); + std::string target("calling child-begin function"); + while ((pos = samplerTestOutput.find(target, pos)) != std::string::npos) { + ++occurrences; + pos += target.length(); + } + + EXPECT_EQ(occurrences, 2); + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: FATAL: No child library of " + "sampler utility library to call"))); + + EXPECT_THAT(output.str(), + Not(HasSubstr("KokkosP: FATAL: Kokkos Tools Programming " + "Interface's tool-invoked Fence is NULL!"))); +} diff --git a/tests/sampler/test_parreduce_prob.cpp b/tests/sampler/test_parreduce_prob.cpp new file mode 100644 index 000000000..0a0130d5d --- /dev/null +++ b/tests/sampler/test_parreduce_prob.cpp @@ -0,0 +1,72 @@ + +#include +#include +#include +#include +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +using ::testing::Contains; +using ::testing::HasSubstr; +using ::testing::Not; + +#include "parreduce.hpp" +#include "matchersProb.hpp" + +/** + * @test This test checks that the tool effectively samples. + * + */ + +TEST(SamplerProbTest, parreduce) { + //! Initialize @c Kokkos. + Kokkos::initialize(); + + //! Redirect output for later analysis. + std::cout.flush(); + std::ostringstream output; + std::streambuf* coutbuf = std::cout.rdbuf(output.rdbuf()); + + //! Run tests. @todo Replace this with Google Test. + Tester tester(Kokkos::DefaultExecutionSpace{}); + + //! Finalize @c Kokkos. + Kokkos::finalize(); + + //! Restore output buffer. + std::cout.flush(); + std::cout.rdbuf(coutbuf); + std::cout << output.str() << std::endl; + + //! Analyze test output. + for (const auto& matcher : matchers) { + EXPECT_THAT(output.str(), HasSubstr(matcher)); + } + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 1 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 2 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 7 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 9 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 10 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 11 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 15 calling"))); + + int occurrences = 0; + std::string::size_type pos = 0; + std::string samplerTestOutput(output.str()); + std::string target("calling child-begin function"); + while ((pos = samplerTestOutput.find(target, pos)) != std::string::npos) { + ++occurrences; + pos += target.length(); + } + EXPECT_EQ(occurrences, 8); + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: FATAL: No child library of " + "sampler utility library to call"))); + + EXPECT_THAT(output.str(), + Not(HasSubstr("KokkosP: FATAL: Kokkos Tools Programming " + "Interface's tool-invoked Fence is NULL!"))); +} diff --git a/tests/sampler/test_parscan.cpp b/tests/sampler/test_parscan.cpp new file mode 100644 index 000000000..2be60c8cd --- /dev/null +++ b/tests/sampler/test_parscan.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +using ::testing::HasSubstr; +using ::testing::Not; + +#include "parscan.hpp" +#include "matchersSkip.hpp" + +/** + * @test This test checks that the tool effectively samples. + * + + */ + +TEST(SamplerSkipTest, parscan) { + //! Initialize @c Kokkos. + Kokkos::initialize(); + + //! Redirect output for later analysis. + std::cout.flush(); + std::ostringstream output; + std::streambuf* coutbuf = std::cout.rdbuf(output.rdbuf()); + + //! Run tests. @todo Replace this with Google Test. + Tester tester(Kokkos::DefaultExecutionSpace{}); + + //! Finalize @c Kokkos. + Kokkos::finalize(); + + //! Restore output buffer. + std::cout.flush(); + std::cout.rdbuf(coutbuf); + std::cout << output.str() << std::endl; + + //! Analyze test output. + for (const auto& matcher : matchers) { + EXPECT_THAT(output.str(), HasSubstr(matcher)); + } + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 1 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 2 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 3 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 4 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 5 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 7 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 8 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 9 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 10 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 11 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 13 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 14 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 15 calling"))); + + int occurrences = 0; + std::string::size_type pos = 0; + std::string samplerTestOutput(output.str()); + std::string target("calling child-begin function"); + while ((pos = samplerTestOutput.find(target, pos)) != std::string::npos) { + ++occurrences; + pos += target.length(); + } + + EXPECT_EQ(occurrences, 2); + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: FATAL: No child library of " + "sampler utility library to call"))); + + EXPECT_THAT(output.str(), + Not(HasSubstr("KokkosP: FATAL: Kokkos Tools Programming " + "Interface's tool-invoked Fence is NULL!"))); +} diff --git a/tests/sampler/test_parscan_prob.cpp b/tests/sampler/test_parscan_prob.cpp new file mode 100644 index 000000000..26e274f2c --- /dev/null +++ b/tests/sampler/test_parscan_prob.cpp @@ -0,0 +1,72 @@ + +#include +#include +#include +#include +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +using ::testing::Contains; +using ::testing::HasSubstr; +using ::testing::Not; + +#include "parscan.hpp" +#include "matchersProb.hpp" + +/** + * @test This test checks that the tool effectively samples. + * + */ + +TEST(SamplerProbTest, parscan) { + //! Initialize @c Kokkos. + Kokkos::initialize(); + + //! Redirect output for later analysis. + std::cout.flush(); + std::ostringstream output; + std::streambuf* coutbuf = std::cout.rdbuf(output.rdbuf()); + + //! Run tests. @todo Replace this with Google Test. + Tester tester(Kokkos::DefaultExecutionSpace{}); + + //! Finalize @c Kokkos. + Kokkos::finalize(); + + //! Restore output buffer. + std::cout.flush(); + std::cout.rdbuf(coutbuf); + std::cout << output.str() << std::endl; + + //! Analyze test output. + for (const auto& matcher : matchers) { + EXPECT_THAT(output.str(), HasSubstr(matcher)); + } + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 1 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 2 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 7 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 9 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 10 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 11 calling"))); + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: sample 15 calling"))); + + int occurrences = 0; + std::string::size_type pos = 0; + std::string samplerTestOutput(output.str()); + std::string target("calling child-begin function"); + while ((pos = samplerTestOutput.find(target, pos)) != std::string::npos) { + ++occurrences; + pos += target.length(); + } + EXPECT_EQ(occurrences, 8); + + EXPECT_THAT(output.str(), Not(HasSubstr("KokkosP: FATAL: No child library of " + "sampler utility library to call"))); + + EXPECT_THAT(output.str(), + Not(HasSubstr("KokkosP: FATAL: Kokkos Tools Programming " + "Interface's tool-invoked Fence is NULL!"))); +} diff --git a/tests/space-time-stack/CMakeLists.txt b/tests/space-time-stack/CMakeLists.txt new file mode 100644 index 000000000..e16b0448c --- /dev/null +++ b/tests/space-time-stack/CMakeLists.txt @@ -0,0 +1,5 @@ +kp_add_executable_and_test( + TARGET_NAME test_space_time_stack_demangling + SOURCE_FILE test_demangling.cpp + KOKKOS_TOOLS_LIBS kp_space_time_stack +) diff --git a/tests/space-time-stack/test_demangling.cpp b/tests/space-time-stack/test_demangling.cpp new file mode 100644 index 000000000..27ca95dd7 --- /dev/null +++ b/tests/space-time-stack/test_demangling.cpp @@ -0,0 +1,80 @@ +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "Kokkos_Core.hpp" + +struct Tester { + struct TagNamed {}; + struct TagUnnamed {}; + + template + explicit Tester(const execution_space& space) { + //! Explicitly launch a kernel with a name and no tag. + Kokkos::parallel_for("named kernel", + Kokkos::RangePolicy(space, 0, 1), + *this); + + //! Explicitly launch a kernel with a name and a tag. + Kokkos::parallel_for( + "named kernel with tag", + Kokkos::RangePolicy(space, 0, 1), *this); + + //! Explicitly launch a kernel with no name and no tag. + Kokkos::parallel_for(Kokkos::RangePolicy(space, 0, 1), + *this); + + //! Explicitly launch a kernel with no name and a tag. + Kokkos::parallel_for( + Kokkos::RangePolicy(space, 0, 1), *this); + } + + KOKKOS_FUNCTION void operator()(const int) const {} + + template + KOKKOS_FUNCTION void operator()(const TagType, const int) const {} +}; + +static const std::vector matchers{ + /// A kernel with a given name appears with the given name, no matter + /// if a tag was given. + "[0-9.e]+ sec [0-9.]+% 100.0% 0.0% ------ 1 named kernel \\[for\\]", + "[0-9.e]+ sec [0-9.]+% 100.0% 0.0% ------ 1 named kernel with tag " + "\\[for\\]", + //! A kernel with no name and no tag appears with a demangled name. + "[0-9.e]+ sec [0-9.]+% 100.0% 0.0% ------ 1 Tester \\[for\\]\n", + //! A kernel with no name and a tag appears with a demangled name. + "[0-9.e]+ sec [0-9.]+% 100.0% 0.0% ------ 1 Tester/Tester::TagUnnamed " + "\\[for\\]"}; + +/** + * @test This test checks that the tool effectively uses + * the demangling helpers. + */ +TEST(SpaceTimeStackTest, demangling) { + //! Initialize @c Kokkos. + Kokkos::initialize(); + + //! Redirect output for later analysis. + std::cout.flush(); + std::ostringstream output; + std::streambuf* coutbuf = std::cout.rdbuf(output.rdbuf()); + + //! Run tests. @todo Replace this with Google Test. + Tester tester(Kokkos::DefaultExecutionSpace{}); + + //! Finalize @c Kokkos. + Kokkos::finalize(); + + //! Restore output buffer. + std::cout.flush(); + std::cout.rdbuf(coutbuf); + std::cout << output.str() << std::endl; + + //! Analyze test output. + for (const auto& matcher : matchers) { + EXPECT_THAT(output.str(), ::testing::ContainsRegex(matcher)); + } +}