From 802493285355148eb6049722889ec55b10547afd Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Wed, 4 Sep 2024 09:21:56 -0400 Subject: [PATCH 1/8] Set version number to 4.4.1 --- CMakeLists.txt | 2 +- Makefile.kokkos | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 054de2c1dae..736cbac218c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,7 +151,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 4) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/Makefile.kokkos b/Makefile.kokkos index 15f24f30732..ccb568a553c 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 4 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial From bbc6f2fd1eded615c5700373c82a2cc5339d0dbc Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Wed, 7 Aug 2024 07:48:03 -0400 Subject: [PATCH 2/8] Improve GH action to produce release artifacts --- .github/workflows/releases.yml | 36 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index f0bb9b0b199..7f1186ef6a3 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -13,11 +13,11 @@ jobs: hashes: ${{ steps.hash.outputs.hashes }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - name: Build artifacts run: | - git archive -o kokkos-${{ github.ref_name }}.zip HEAD - git archive -o kokkos-${{ github.ref_name }}.tar.gz HEAD + git archive --prefix=kokkos-${{ github.ref_name }}/ -o kokkos-${{ github.ref_name }}.zip HEAD + git archive --prefix=kokkos-${{ github.ref_name }}/ -o kokkos-${{ github.ref_name }}.tar.gz HEAD - name: Generate hashes shell: bash @@ -25,21 +25,14 @@ jobs: run: | # sha256sum generates sha256 hash for all artifacts. # base64 -w0 encodes to base64 and outputs on a single line. - echo "hashes=$(sha256sum kokkos-${{ github.ref_name }}.zip kokkos-${{ github.ref_name }}.tar.gz | base64 -w0)" >> "$GITHUB_OUTPUT" + sha256sum kokkos-${{ github.ref_name }}.zip kokkos-${{ github.ref_name }}.tar.gz > kokkos-${{ github.ref_name }}-SHA-256.txt + echo "hashes=$(base64 -w0 kokkos-${{ github.ref_name }}-SHA-256.txt)" >> "$GITHUB_OUTPUT" - - name: Upload source code (zip) - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 + - name: Upload artifacts + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: - name: kokkos-${{ github.ref_name }}.zip - path: kokkos-${{ github.ref_name }}.zip - if-no-files-found: error - retention-days: 5 - - - name: Upload source code (tar.gz) - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 - with: - name: kokkos-${{ github.ref_name }}.tar.gz - path: kokkos-${{ github.ref_name }}.tar.gz + name: release-artifacts + path: kokkos-${{ github.ref_name }}* if-no-files-found: error retention-days: 5 @@ -65,19 +58,14 @@ jobs: runs-on: ubuntu-latest if: startsWith(github.ref, 'refs/tags/') steps: - - name: Download kokkos-${{ github.ref_name }}.zip + - name: Download artifacts uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: kokkos-${{ github.ref_name }}.zip - - - name: Download kokkos-${{ github.ref_name }}.tar.gz - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: kokkos-${{ github.ref_name }}.tar.gz - + name: release-artifacts - name: Upload assets uses: softprops/action-gh-release@c062e08bd532815e2082a85e87e3ef29c3e6d191 # v2.0.8 with: files: | kokkos-${{ github.ref_name }}.zip kokkos-${{ github.ref_name }}.tar.gz + kokkos-${{ github.ref_name }}-SHA-256.txt From 9d8cf2bf9e84677bb86b7f20018cd26684319479 Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Tue, 3 Sep 2024 16:21:35 -0400 Subject: [PATCH 3/8] Reduce visibility of `g_openmp_hardware_max_threads` symbol to avoid trouble with shared libabries --- .../continuous-integration-workflow.yml | 1 + .jenkins | 1 + core/src/OpenMP/Kokkos_OpenMP.cpp | 2 +- core/src/OpenMP/Kokkos_OpenMP_Instance.cpp | 39 +++++++++++-------- core/src/OpenMP/Kokkos_OpenMP_Instance.hpp | 4 +- core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp | 3 +- 6 files changed, 30 insertions(+), 20 deletions(-) diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index 9c7b7585d06..0f2c67a0a11 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -113,6 +113,7 @@ jobs: cmake -B builddir \ -DCMAKE_INSTALL_PREFIX=/usr \ ${{ matrix.clang-tidy }} \ + -DBUILD_SHARED_LIBS=ON \ -Ddesul_ROOT=/usr/desul-install/ \ -DKokkos_ENABLE_DESUL_ATOMICS_EXTERNAL=ON \ -DKokkos_ENABLE_HWLOC=ON \ diff --git a/.jenkins b/.jenkins index 0393ff06fb5..1635a69f298 100644 --- a/.jenkins +++ b/.jenkins @@ -461,6 +461,7 @@ pipeline { -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_LIBDL=OFF \ + -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ENABLE_IMPL_MDSPAN=OFF \ -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF \ .. && \ diff --git a/core/src/OpenMP/Kokkos_OpenMP.cpp b/core/src/OpenMP/Kokkos_OpenMP.cpp index 82199d0d72d..0f65ba43a0d 100644 --- a/core/src/OpenMP/Kokkos_OpenMP.cpp +++ b/core/src/OpenMP/Kokkos_OpenMP.cpp @@ -113,7 +113,7 @@ int OpenMP::impl_thread_pool_size() const noexcept { } int OpenMP::impl_max_hardware_threads() noexcept { - return Impl::g_openmp_hardware_max_threads; + return Impl::OpenMPInternal::max_hardware_threads(); } namespace Impl { diff --git a/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp b/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp index 0f4c7d60524..473a322eecf 100644 --- a/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp +++ b/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp @@ -31,12 +31,20 @@ #include #include +namespace { +int g_openmp_hardware_max_threads = 1; +} + namespace Kokkos { namespace Impl { std::vector OpenMPInternal::all_instances; std::mutex OpenMPInternal::all_instances_mutex; +int OpenMPInternal::max_hardware_threads() noexcept { + return g_openmp_hardware_max_threads; +} + void OpenMPInternal::clear_thread_data() { const size_t member_bytes = sizeof(int64_t) * @@ -188,9 +196,9 @@ void OpenMPInternal::initialize(int thread_count) { // Before any other call to OMP query the maximum number of threads // and save the value for re-initialization unit testing. - Impl::g_openmp_hardware_max_threads = get_current_max_threads(); + g_openmp_hardware_max_threads = get_current_max_threads(); - int process_num_threads = Impl::g_openmp_hardware_max_threads; + int process_num_threads = g_openmp_hardware_max_threads; if (Kokkos::hwloc::available()) { process_num_threads = Kokkos::hwloc::get_available_numa_count() * @@ -203,11 +211,11 @@ void OpenMPInternal::initialize(int thread_count) { // process_num_threads if thread_count > 0, set // g_openmp_hardware_max_threads to thread_count if (thread_count < 0) { - thread_count = Impl::g_openmp_hardware_max_threads; + thread_count = g_openmp_hardware_max_threads; } else if (thread_count == 0) { - if (Impl::g_openmp_hardware_max_threads != process_num_threads) { - Impl::g_openmp_hardware_max_threads = process_num_threads; - omp_set_num_threads(Impl::g_openmp_hardware_max_threads); + if (g_openmp_hardware_max_threads != process_num_threads) { + g_openmp_hardware_max_threads = process_num_threads; + omp_set_num_threads(g_openmp_hardware_max_threads); } } else { if (Kokkos::show_warnings() && thread_count > process_num_threads) { @@ -218,16 +226,16 @@ void OpenMPInternal::initialize(int thread_count) { << ", requested thread : " << std::setw(3) << thread_count << std::endl; } - Impl::g_openmp_hardware_max_threads = thread_count; - omp_set_num_threads(Impl::g_openmp_hardware_max_threads); + g_openmp_hardware_max_threads = thread_count; + omp_set_num_threads(g_openmp_hardware_max_threads); } // setup thread local -#pragma omp parallel num_threads(Impl::g_openmp_hardware_max_threads) +#pragma omp parallel num_threads(g_openmp_hardware_max_threads) { Impl::SharedAllocationRecord::tracking_enable(); } auto &instance = OpenMPInternal::singleton(); - instance.m_pool_size = Impl::g_openmp_hardware_max_threads; + instance.m_pool_size = g_openmp_hardware_max_threads; // New, unified host thread team data: { @@ -272,10 +280,9 @@ void OpenMPInternal::finalize() { if (this == &singleton()) { auto const &instance = singleton(); // Silence Cuda Warning - const int nthreads = - instance.m_pool_size <= Impl::g_openmp_hardware_max_threads - ? Impl::g_openmp_hardware_max_threads - : instance.m_pool_size; + const int nthreads = instance.m_pool_size <= g_openmp_hardware_max_threads + ? g_openmp_hardware_max_threads + : instance.m_pool_size; (void)nthreads; #pragma omp parallel num_threads(nthreads) @@ -284,7 +291,7 @@ void OpenMPInternal::finalize() { // allow main thread to track Impl::SharedAllocationRecord::tracking_enable(); - Impl::g_openmp_hardware_max_threads = 1; + g_openmp_hardware_max_threads = 1; } m_initialized = false; @@ -307,7 +314,7 @@ void OpenMPInternal::print_configuration(std::ostream &s) const { if (m_initialized) { const int numa_count = 1; - const int core_per_numa = Impl::g_openmp_hardware_max_threads; + const int core_per_numa = g_openmp_hardware_max_threads; const int thread_per_core = 1; s << " thread_pool_topology[ " << numa_count << " x " << core_per_numa diff --git a/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp b/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp index f4a0d3e2012..2aed723b18f 100644 --- a/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp +++ b/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp @@ -47,8 +47,6 @@ namespace Impl { class OpenMPInternal; -inline int g_openmp_hardware_max_threads = 1; - struct OpenMPTraits { static constexpr int MAX_THREAD_COUNT = 512; }; @@ -86,6 +84,8 @@ class OpenMPInternal { void clear_thread_data(); + static int max_hardware_threads() noexcept; + int thread_pool_size() const { return m_pool_size; } void resize_thread_data(size_t pool_reduce_bytes, size_t team_reduce_bytes, diff --git a/core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp b/core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp index a37e1758a26..5937c093ba1 100644 --- a/core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp +++ b/core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp @@ -105,7 +105,8 @@ class UniqueToken { /// \brief upper bound for acquired values, i.e. 0 <= value < size() KOKKOS_INLINE_FUNCTION int size() const noexcept { - KOKKOS_IF_ON_HOST((return Kokkos::Impl::g_openmp_hardware_max_threads;)) + KOKKOS_IF_ON_HOST( + (return Kokkos::Impl::OpenMPInternal::max_hardware_threads();)) KOKKOS_IF_ON_DEVICE((return 0;)) } From 9e5a0c9e3a200608bd78d7dbe79981273f11e4fe Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Wed, 4 Sep 2024 09:51:51 -0400 Subject: [PATCH 4/8] Introduce new SequentialHostInit view allocation property --- core/src/Kokkos_View.hpp | 2 + core/src/View/Kokkos_ViewAlloc.hpp | 45 +++++++++++ core/src/impl/Kokkos_ViewCtor.hpp | 25 ++++-- core/src/impl/Kokkos_ViewMapping.hpp | 10 ++- core/unit_test/TestViewOfViews.hpp | 116 ++++++++++++++++++++------- 5 files changed, 157 insertions(+), 41 deletions(-) diff --git a/core/src/Kokkos_View.hpp b/core/src/Kokkos_View.hpp index 820a40a5f55..04d1fcf1518 100644 --- a/core/src/Kokkos_View.hpp +++ b/core/src/Kokkos_View.hpp @@ -571,6 +571,8 @@ inline constexpr Kokkos::ALL_t ALL{}; #pragma omp end declare target #endif +inline constexpr Kokkos::Impl::SequentialHostInit_t SequentialHostInit{}; + inline constexpr Kokkos::Impl::WithoutInitializing_t WithoutInitializing{}; inline constexpr Kokkos::Impl::AllowPadding_t AllowPadding{}; diff --git a/core/src/View/Kokkos_ViewAlloc.hpp b/core/src/View/Kokkos_ViewAlloc.hpp index 95cb6f619cc..1ade75692f1 100644 --- a/core/src/View/Kokkos_ViewAlloc.hpp +++ b/core/src/View/Kokkos_ViewAlloc.hpp @@ -313,6 +313,51 @@ struct ViewValueFunctor { void destroy_shared_allocation() {} }; + +template +struct ViewValueFunctorSequentialHostInit { + using ExecSpace = typename DeviceType::execution_space; + using MemSpace = typename DeviceType::memory_space; + static_assert(SpaceAccessibility::accessible); + + ValueType* ptr; + size_t n; + + ViewValueFunctorSequentialHostInit() = default; + + ViewValueFunctorSequentialHostInit(ExecSpace const& /*arg_space*/, + ValueType* const arg_ptr, + size_t const arg_n, + std::string /*arg_name*/) + : ptr(arg_ptr), n(arg_n) {} + + ViewValueFunctorSequentialHostInit(ValueType* const arg_ptr, + size_t const arg_n, + std::string /*arg_name*/) + : ptr(arg_ptr), n(arg_n) {} + + void construct_shared_allocation() { + if constexpr (std::is_trivial_v) { + // value-initialization is equivalent to filling with zeros + std::memset(static_cast(ptr), 0, n * sizeof(ValueType)); + } else { + for (size_t i = 0; i < n; ++i) { + new (ptr + i) ValueType(); + } + } + } + + void destroy_shared_allocation() { + if constexpr (std::is_trivially_destructible_v) { + // do nothing, don't bother calling the destructor + } else { + for (size_t i = 0; i < n; ++i) { + (ptr + i)->~ValueType(); + } + } + } +}; + } // namespace Kokkos::Impl #endif // KOKKOS_VIEW_ALLOC_HPP diff --git a/core/src/impl/Kokkos_ViewCtor.hpp b/core/src/impl/Kokkos_ViewCtor.hpp index e1b8ba86a5b..379180ae643 100644 --- a/core/src/impl/Kokkos_ViewCtor.hpp +++ b/core/src/impl/Kokkos_ViewCtor.hpp @@ -23,12 +23,16 @@ namespace Kokkos { namespace Impl { +struct SequentialHostInit_t {}; struct WithoutInitializing_t {}; struct AllowPadding_t {}; template struct is_view_ctor_property : public std::false_type {}; +template <> +struct is_view_ctor_property : public std::true_type {}; + template <> struct is_view_ctor_property : public std::true_type {}; @@ -84,10 +88,10 @@ struct ViewCtorProp> { /* Property flags have constexpr value */ template -struct ViewCtorProp< - std::enable_if_t::value || - std::is_same::value>, - P> { +struct ViewCtorProp || + std::is_same_v || + std::is_same_v>, + P> { ViewCtorProp() = default; ViewCtorProp(const ViewCtorProp &) = default; ViewCtorProp &operator=(const ViewCtorProp &) = default; @@ -199,6 +203,11 @@ struct ViewCtorProp : public ViewCtorProp... { Kokkos::Impl::has_type::value; static constexpr bool initialize = !Kokkos::Impl::has_type::value; + static constexpr bool sequential_host_init = + Kokkos::Impl::has_type::value; + static_assert(initialize || !sequential_host_init, + "Incompatible WithoutInitializing and SequentialHostInit view " + "alloc properties"); using memory_space = typename var_memory_space::type; using execution_space = typename var_execution_space::type; @@ -251,7 +260,9 @@ auto with_properties_if_unset(const ViewCtorProp &view_ctor_prop, (is_view_label::value && !ViewCtorProp::has_label) || (std::is_same_v && - ViewCtorProp::initialize)) { + ViewCtorProp::initialize) || + (std::is_same_v && + !ViewCtorProp::sequential_host_init)) { using NewViewCtorProp = ViewCtorProp; NewViewCtorProp new_view_ctor_prop(view_ctor_prop); static_cast &>(new_view_ctor_prop).value = @@ -299,7 +310,9 @@ struct WithPropertiesIfUnset, Property, Properties...> { (is_view_label::value && !ViewCtorProp::has_label) || (std::is_same_v && - ViewCtorProp::initialize)) { + ViewCtorProp::initialize) || + (std::is_same_v && + !ViewCtorProp::sequential_host_init)) { using NewViewCtorProp = ViewCtorProp; NewViewCtorProp new_view_ctor_prop(view_ctor_prop); static_cast &>(new_view_ctor_prop).value = diff --git a/core/src/impl/Kokkos_ViewMapping.hpp b/core/src/impl/Kokkos_ViewMapping.hpp index 8919dccdb7a..10aaa63b7c8 100644 --- a/core/src/impl/Kokkos_ViewMapping.hpp +++ b/core/src/impl/Kokkos_ViewMapping.hpp @@ -2825,10 +2825,12 @@ class ViewMapping< using memory_space = typename Traits::memory_space; static_assert( SpaceAccessibility::accessible); - using value_type = typename Traits::value_type; - using functor_type = - ViewValueFunctor, - value_type>; + using device_type = Kokkos::Device; + using value_type = typename Traits::value_type; + using functor_type = std::conditional_t< + alloc_prop::sequential_host_init, + ViewValueFunctorSequentialHostInit, + ViewValueFunctor>; using record_type = Kokkos::Impl::SharedAllocationRecord; diff --git a/core/unit_test/TestViewOfViews.hpp b/core/unit_test/TestViewOfViews.hpp index a87c829bb73..1d53bca336d 100644 --- a/core/unit_test/TestViewOfViews.hpp +++ b/core/unit_test/TestViewOfViews.hpp @@ -20,7 +20,7 @@ namespace { -// User-defined type with a View data member +// User-defined types with a View data member template class S { V v_; @@ -28,48 +28,102 @@ class S { public: template S(std::string label, Extents... extents) : v_(std::move(label), extents...) {} - S() = default; + KOKKOS_DEFAULTED_FUNCTION S() = default; }; template -void test_view_of_views() { +class N { // not default constructible + V v_; + + public: + template + N(std::string label, Extents... extents) : v_(std::move(label), extents...) {} +}; + +template +class H { // constructible and destructible only from on the host side + V v_; + + public: + template + H(std::string label, Extents... extents) : v_(std::move(label), extents...) {} + H() {} + ~H() {} +}; + +template +void test_view_of_views_default() { + // assigning a default-constructed view to destruct the inner objects using VoV = Kokkos::View; - { // assigning a default-constructed view to destruct the inner objects - VoV vov("vov", 2, 3); - V a("a"); - V b("b"); - vov(0, 0) = a; - vov(1, 0) = a; - vov(0, 1) = b; + VoV vov("vov", 2, 3); + V a("a"); + V b("b"); + vov(0, 0) = a; + vov(1, 0) = a; + vov(0, 1) = b; #ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND - vov(0, 0) = V(); - vov(1, 0) = V(); - vov(0, 1) = V(); + vov(0, 0) = V(); + vov(1, 0) = V(); + vov(0, 1) = V(); #endif - } - { // using placement new to construct the inner objects and explicitly - // calling the destructor - VoV vov(Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); - V a("a"); - V b("b"); - new (&vov(0, 0)) V(a); - new (&vov(1, 0)) V(a); - new (&vov(0, 1)) V(b); +} + +template +void test_view_of_views_without_initializing() { + // using placement new to construct the inner objects and explicitly + // calling the destructor + using VoV = Kokkos::View; + VoV vov(Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); + V a("a"); + V b("b"); + new (&vov(0, 0)) V(a); + new (&vov(1, 0)) V(a); + new (&vov(0, 1)) V(b); #ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND - vov(0, 0).~V(); - vov(1, 0).~V(); - vov(0, 1).~V(); + vov(0, 0).~V(); + vov(1, 0).~V(); + vov(0, 1).~V(); #else - // leaks memory + // leaks memory #endif - } } -TEST(TEST_CATEGORY, view_of_views) { - test_view_of_views>(); - test_view_of_views>(); +template +void test_view_of_views_sequential_host_init() { + // inner views value-initialized sequentially on the host, and also + // sequentially destructed on the host, without the need to cleanup + using VoV = Kokkos::View; + VoV vov(Kokkos::view_alloc("vov", Kokkos::SequentialHostInit), 2, 3); + V a("a"); + V b("b"); + vov(0, 0) = a; + vov(1, 0) = a; + vov(0, 1) = b; +} + +TEST(TEST_CATEGORY, view_of_views_default) { + test_view_of_views_default>(); + test_view_of_views_default>(); // User-defined type with View data member - test_view_of_views>>(); + test_view_of_views_default>>(); +} + +TEST(TEST_CATEGORY, view_of_views_without_initializing) { + test_view_of_views_without_initializing>(); + test_view_of_views_without_initializing< + S>>(); + test_view_of_views_without_initializing< + N>>(); + test_view_of_views_without_initializing< + H>>(); +} + +TEST(TEST_CATEGORY, test_view_of_views_sequential_host_init) { + test_view_of_views_sequential_host_init>(); + test_view_of_views_sequential_host_init< + S>>(); + test_view_of_views_sequential_host_init< + H>>(); } } // namespace From 34650996b32d94ee32b246a688468d539475acb8 Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Wed, 4 Sep 2024 10:39:27 -0400 Subject: [PATCH 5/8] Add changelog for 4.4.1 --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78225f9e6c2..db7accd5ed8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # CHANGELOG +## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.4.01) + +### Feature: +* Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229) + +### Bug Fix +* Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284) + ## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00) From 9b898bb6fa76e078dd162e809035b8dd79d76d97 Mon Sep 17 00:00:00 2001 From: Christian Robert Trott Date: Wed, 4 Sep 2024 13:35:51 -0700 Subject: [PATCH 6/8] Squashed CUDA Unified memory stuff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Cédric Chevalier Co-authored-by: Patrick Diehl Co-authored-by: Daniel Arndt Co-authored-by: Damien L-G --- cmake/KokkosCore_config.h.in | 1 + cmake/kokkos_enable_options.cmake | 4 +- .../unit_tests/TestWithoutInitializing.hpp | 12 ++++++ core/src/Cuda/Kokkos_CudaSpace.cpp | 39 +++++++++++++++++-- core/src/Cuda/Kokkos_CudaSpace.hpp | 23 ++++++++++- core/src/Cuda/Kokkos_Cuda_Instance.cpp | 20 ++++++++++ core/unit_test/cuda/TestCuda_Spaces.cpp | 16 ++++++++ 7 files changed, 110 insertions(+), 5 deletions(-) diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in index 7997aa3707c..a93007ff83f 100644 --- a/cmake/KokkosCore_config.h.in +++ b/cmake/KokkosCore_config.h.in @@ -37,6 +37,7 @@ #cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA // deprecated #cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR #cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC +#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY #cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS #cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY diff --git a/cmake/kokkos_enable_options.cmake b/cmake/kokkos_enable_options.cmake index b900c4a232e..53764b0c684 100644 --- a/cmake/kokkos_enable_options.cmake +++ b/cmake/kokkos_enable_options.cmake @@ -48,6 +48,8 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda # resolved but we keep the option around a bit longer to be safe. KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)") KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler") +KOKKOS_ENABLE_OPTION(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA") + KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" ) KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" ) KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP") @@ -135,7 +137,7 @@ FUNCTION(check_device_specific_options) ENDIF() ENDFUNCTION() -CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC) +CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC IMPL_CUDA_UNIFIED_MEMORY) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH) diff --git a/containers/unit_tests/TestWithoutInitializing.hpp b/containers/unit_tests/TestWithoutInitializing.hpp index 7201cd402a9..e8558628dc8 100644 --- a/containers/unit_tests/TestWithoutInitializing.hpp +++ b/containers/unit_tests/TestWithoutInitializing.hpp @@ -37,6 +37,17 @@ #endif ///@} +/// Some tests are skipped for unified memory space +#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) +#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE \ + if constexpr (std::is_same_v) \ + GTEST_SKIP() << "skipping since unified memory requires additional " \ + "fences"; +#else +#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE +#endif + TEST(TEST_CATEGORY, resize_realloc_no_init_dualview) { using namespace Kokkos::Test::Tools; listen_tool_events(Config::DisableAll(), Config::EnableKernels()); @@ -657,6 +668,7 @@ TEST(TEST_CATEGORY, create_mirror_no_init_dynamicview) { TEST(TEST_CATEGORY, create_mirror_view_and_copy_dynamicview) { GTEST_SKIP_IF_CUDAUVM_MEMORY_SPACE + GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE using namespace Kokkos::Test::Tools; listen_tool_events(Config::DisableAll(), Config::EnableKernels(), diff --git a/core/src/Cuda/Kokkos_CudaSpace.cpp b/core/src/Cuda/Kokkos_CudaSpace.cpp index 75318aff778..6ae24022c8f 100644 --- a/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -31,7 +31,6 @@ #include #include -//#include #include #include @@ -178,6 +177,29 @@ void *impl_allocate_common(const int device_id, cudaError_t error_code = cudaSuccess; #ifndef CUDART_VERSION #error CUDART_VERSION undefined! +#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + // This is intended for Grace-Hopper (and future unified memory architectures) + // The idea is to use host allocator and then advise to keep it in HBM on the + // device, but that requires CUDA 12.2 + static_assert(CUDART_VERSION >= 12020, + "CUDA runtime version >=12.2 required when " + "Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY is set. " + "Please update your CUDA runtime version or " + "reconfigure with " + "-D Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=OFF"); + if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr + error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal); + if (error_code == cudaSuccess) { + // One would think cudaMemLocation{device_id, + // cudaMemLocationTypeDevice} would work but it doesn't. I.e. the order of + // members doesn't seem to be defined. + cudaMemLocation loc; + loc.id = device_id; + loc.type = cudaMemLocationTypeDevice; + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2( + ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc)); + } + } #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) if (arg_alloc_size >= memory_threshold_g) { error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream); @@ -190,9 +212,13 @@ void *impl_allocate_common(const int device_id, "Kokkos::Cuda: backend fence after async malloc"); } } - } else + } else { + error_code = cudaMalloc(&ptr, arg_alloc_size); + } +#else + error_code = cudaMalloc(&ptr, arg_alloc_size); #endif - { error_code = cudaMalloc(&ptr, arg_alloc_size); } + if (error_code != cudaSuccess) { // TODO tag as unlikely branch // This is the only way to clear the last error, which // we should do here since we're turning it into an @@ -326,6 +352,9 @@ void CudaSpace::impl_deallocate( } #ifndef CUDART_VERSION #error CUDART_VERSION undefined! +#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) if (arg_alloc_size >= memory_threshold_g) { Impl::cuda_device_synchronize( @@ -436,8 +465,12 @@ void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes, #include +#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::CudaSpace); +#else +KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::CudaSpace); +#endif KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::CudaUVMSpace); KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( diff --git a/core/src/Cuda/Kokkos_CudaSpace.hpp b/core/src/Cuda/Kokkos_CudaSpace.hpp index 0e20193e8b4..e1d062d72d5 100644 --- a/core/src/Cuda/Kokkos_CudaSpace.hpp +++ b/core/src/Cuda/Kokkos_CudaSpace.hpp @@ -88,6 +88,19 @@ class CudaSpace { void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; +#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + template + void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const { + return allocate(arg_alloc_size); + } + template + void* allocate(const ExecutionSpace&, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const { + return allocate(arg_label, arg_alloc_size, arg_logical_size); + } +#endif + /**\brief Deallocate untracked memory in the cuda space */ void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const; void deallocate(const char* arg_label, void* const arg_alloc_ptr, @@ -337,7 +350,11 @@ static_assert( template <> struct MemorySpaceAccess { enum : bool { assignable = false }; - enum : bool { accessible = false }; +#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + enum : bool{accessible = false}; +#else + enum : bool { accessible = true }; +#endif enum : bool { deepcopy = true }; }; @@ -558,8 +575,12 @@ struct DeepCopy::assignable); +#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY static_assert( !Kokkos::Impl::MemorySpaceAccess::accessible); +#else + static_assert(Kokkos::Impl::MemorySpaceAccess::accessible); +#endif static_assert( !Kokkos::Impl::MemorySpaceAccess::accessible); +#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY static_assert(!Kokkos::SpaceAccessibility::accessible); +#else + static_assert(Kokkos::SpaceAccessibility::accessible); +#endif static_assert(Kokkos::SpaceAccessibility::accessible); @@ -157,8 +167,14 @@ TEST(cuda, space_access) { Kokkos::SpaceAccessibility::accessible); +#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY static_assert(std::is_same::Space, Kokkos::HostSpace>::value); +#else + static_assert(std::is_same::Space, + Kokkos::Device>::value); +#endif static_assert( std::is_same::Space, From 28ccd1e72832a3bbe9393b7649c3798723bc328c Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Fri, 6 Sep 2024 15:04:57 -0600 Subject: [PATCH 7/8] [4.4.01] Add SIMD AVX2 mask assignment fix and update changelog for 4.4.01 (#7300) * [ci skip] Update changelog for 4.4.01 * Fix implicit copy assignment operator for AVX2 mask being deleted * Add SIMD fix to changelog * Fixup 4.4.1 change log [ci skip] --------- Co-authored-by: Dong Hun Lee Co-authored-by: Damien L-G --- CHANGELOG.md | 12 +- simd/src/Kokkos_SIMD_AVX2.hpp | 12 +- simd/unit_tests/TestSIMD.cpp | 1 + .../include/TestSIMD_Construction.hpp | 150 ++++++++++++++++++ 4 files changed, 163 insertions(+), 12 deletions(-) create mode 100644 simd/unit_tests/include/TestSIMD_Construction.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index db7accd5ed8..7b1d69e5663 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,17 @@ ## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.4.01) -### Feature: +### Features: * Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229) -### Bug Fix -* Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284) +### Backend and Architecture Enhancements: + +#### CUDA: +* Experimental support for unified memory mode (intended for Grace-Hopper etc.) [\#6823](https://github.com/kokkos/kokkos/pull/6823) + +### Bug Fixes +* OpenMP: Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284) +* Fix implicit copy assignment operators in few AVX2 masks being deleted [#7296](https://github.com/kokkos/kokkos/pull/7296) ## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00) diff --git a/simd/src/Kokkos_SIMD_AVX2.hpp b/simd/src/Kokkos_SIMD_AVX2.hpp index 27c8af79abd..0525dc8887a 100644 --- a/simd/src/Kokkos_SIMD_AVX2.hpp +++ b/simd/src/Kokkos_SIMD_AVX2.hpp @@ -361,9 +361,7 @@ class simd_mask> { }; using value_type = bool; using abi_type = simd_abi::avx2_fixed_size<4>; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) : m_value(_mm_set1_epi32(-std::int32_t(value))) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { @@ -460,9 +458,7 @@ class simd_mask> { }; using value_type = bool; using abi_type = simd_abi::avx2_fixed_size<8>; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) : m_value(_mm256_set1_epi32(-std::int32_t(value))) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { @@ -561,9 +557,7 @@ class simd_mask> { }; using value_type = bool; using abi_type = simd_abi::avx2_fixed_size<4>; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) : m_value(_mm256_set1_epi64x(-std::int64_t(value))) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { diff --git a/simd/unit_tests/TestSIMD.cpp b/simd/unit_tests/TestSIMD.cpp index 7a1f9be2a0f..df18b43c4e3 100644 --- a/simd/unit_tests/TestSIMD.cpp +++ b/simd/unit_tests/TestSIMD.cpp @@ -22,3 +22,4 @@ #include #include #include +#include diff --git a/simd/unit_tests/include/TestSIMD_Construction.hpp b/simd/unit_tests/include/TestSIMD_Construction.hpp new file mode 100644 index 00000000000..0ceb1496c47 --- /dev/null +++ b/simd/unit_tests/include/TestSIMD_Construction.hpp @@ -0,0 +1,150 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_TEST_SIMD_CONSTRUCTION_HPP +#define KOKKOS_TEST_SIMD_CONSTRUCTION_HPP + +#include +#include + +template +inline void host_test_simd_traits() { + using simd_type = Kokkos::Experimental::simd; + + static_assert(std::is_nothrow_default_constructible_v); + static_assert(std::is_nothrow_copy_assignable_v); + static_assert(std::is_nothrow_copy_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + static_assert(std::is_nothrow_move_constructible_v); + + simd_type default_simd, result; + simd_type test_simd(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); }); + simd_type copy_simd(test_simd); + simd_type move_simd(std::move(copy_simd)); + default_simd = std::move(move_simd); + result = default_simd; + EXPECT_TRUE(all_of(test_simd == result)); +} + +template +inline void host_test_mask_traits() { + using mask_type = Kokkos::Experimental::simd_mask; + + static_assert(std::is_nothrow_default_constructible_v); + static_assert(std::is_nothrow_copy_assignable_v); + static_assert(std::is_nothrow_copy_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + static_assert(std::is_nothrow_move_constructible_v); + + mask_type default_mask, result; + mask_type test_mask(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); }); + mask_type copy_mask(test_mask); + mask_type move_mask(std::move(copy_mask)); + default_mask = std::move(move_mask); + result = default_mask; + EXPECT_EQ(test_mask, result); +} + +template +inline void host_check_construction() { + if constexpr (is_type_v>) { + host_test_simd_traits(); + host_test_mask_traits(); + } +} + +template +inline void host_check_construction_all_types( + Kokkos::Experimental::Impl::data_types) { + (host_check_construction(), ...); +} + +template +inline void host_check_construction_all_abis( + Kokkos::Experimental::Impl::abi_set) { + using DataTypes = Kokkos::Experimental::Impl::data_type_set; + (host_check_construction_all_types(DataTypes()), ...); +} + +template +KOKKOS_INLINE_FUNCTION void device_test_simd_traits() { + using simd_type = Kokkos::Experimental::simd; + + simd_type default_simd, result; + simd_type test_simd(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); }); + simd_type copy_simd(test_simd); + simd_type move_simd(std::move(copy_simd)); + default_simd = std::move(move_simd); + result = default_simd; + + kokkos_checker checker; + checker.truth(all_of(test_simd == result)); +} + +template +KOKKOS_INLINE_FUNCTION void device_test_mask_traits() { + using mask_type = Kokkos::Experimental::simd_mask; + + mask_type default_mask, result; + mask_type test_mask(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); }); + mask_type copy_mask(test_mask); + mask_type move_mask(std::move(copy_mask)); + default_mask = std::move(move_mask); + result = default_mask; + + kokkos_checker checker; + checker.truth(test_mask == result); +} + +template +KOKKOS_INLINE_FUNCTION void device_check_construction() { + if constexpr (is_type_v>) { + device_test_simd_traits(); + device_test_mask_traits(); + } +} + +template +KOKKOS_INLINE_FUNCTION void device_check_construction_all_types( + Kokkos::Experimental::Impl::data_types) { + (device_check_construction(), ...); +} + +template +KOKKOS_INLINE_FUNCTION void device_check_construction_all_abis( + Kokkos::Experimental::Impl::abi_set) { + using DataTypes = Kokkos::Experimental::Impl::data_type_set; + (device_check_construction_all_types(DataTypes()), ...); +} + +class simd_device_construction_functor { + public: + KOKKOS_INLINE_FUNCTION void operator()(int) const { + device_check_construction_all_abis( + Kokkos::Experimental::Impl::device_abi_set()); + } +}; + +TEST(simd, host_construction) { + host_check_construction_all_abis(Kokkos::Experimental::Impl::host_abi_set()); +} + +TEST(simd, device_construction) { + Kokkos::parallel_for(Kokkos::RangePolicy>(0, 1), + simd_device_construction_functor()); +} + +#endif From 5cb2fa30a39a73664b7508d0a514e8f8daa84359 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Thu, 12 Sep 2024 11:10:58 -0600 Subject: [PATCH 8/8] Update master_history.txt for 4.4.01 --- master_history.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/master_history.txt b/master_history.txt index a0e83bef237..f2a41636101 100644 --- a/master_history.txt +++ b/master_history.txt @@ -38,3 +38,4 @@ tag: 4.2.01 date: 01:30:2024 master: 71a9bcae release: 221e5f7a tag: 4.3.00 date: 04:03:2024 master: e0dc0128 release: f08217a4 tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c +tag: 4.4.01 date: 09:12:2024 master: 08ceff92 release: 2d60c039