From 73a7a41ba81e3b7d15c387571264d96c13cdfc43 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 1 May 2024 15:14:06 -0600 Subject: [PATCH 1/9] update to version 4.3.01 - included: changelog fixup from develop --- CHANGELOG.md | 2 +- CMakeLists.txt | 2 +- Makefile.kokkos | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7b8af7695c..c70ee5505f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ * Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772) #### Threads: -* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6446) +* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6601) #### OpenMP: * Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93a796f200b..76f2183db8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,7 +151,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 3) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/Makefile.kokkos b/Makefile.kokkos index 2c74dd77bfb..f78a7b9adcb 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 3 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial From 775023262c88d401b39f95f33c34de47af037202 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 1 May 2024 15:15:27 -0600 Subject: [PATCH 2/9] changelog: header for version 4.3.01 --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c70ee5505f8..a412995da0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # CHANGELOG +## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) + +### Bug Fixes + ## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00) From 30979fb936dbebf69fc7c5a82d5fa88a86318515 Mon Sep 17 00:00:00 2001 From: Francesco Rizzi Date: Wed, 10 Apr 2024 23:19:08 +0200 Subject: [PATCH 3/9] cuda: reduction with `RangePolicy`: fix grid dimensions to work for large values and avoid overflow (#6578) Fixes issue #6578 and adds a test based on the bug report --- core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp | 5 ++-- core/unit_test/TestReduce.hpp | 26 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp b/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp index 0f052be3c30..334834938a1 100644 --- a/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp @@ -312,8 +312,9 @@ class ParallelReduce, // REQUIRED ( 1 , N , 1 ) dim3 block(1, block_size, 1); // Required grid.x <= block.y - dim3 grid(std::min(int(block.y), int((nwork + block.y - 1) / block.y)), 1, - 1); + dim3 grid(std::min(index_type(block.y), + index_type((nwork + block.y - 1) / block.y)), + 1, 1); // TODO @graph We need to effectively insert this in to the graph const int shmem = diff --git a/core/unit_test/TestReduce.hpp b/core/unit_test/TestReduce.hpp index e1aa851f102..61b2bfb1505 100644 --- a/core/unit_test/TestReduce.hpp +++ b/core/unit_test/TestReduce.hpp @@ -625,4 +625,30 @@ TEST(TEST_CATEGORY, int_combined_reduce_mixed) { } #endif #endif + +#if defined(NDEBUG) +// the following test was made for: +// https://github.com/kokkos/kokkos/issues/6517 + +struct FunctorReductionWithLargeIterationCount { + KOKKOS_FUNCTION void operator()(const int64_t /*i*/, double& update) const { + update += 1.0; + } +}; + +TEST(TEST_CATEGORY, reduction_with_large_iteration_count) { + if constexpr (std::is_same_v) { + GTEST_SKIP() << "Disabling for host backends"; + } + + const int64_t N = pow(2LL, 39LL) - pow(2LL, 8LL) + 1; + Kokkos::RangePolicy> p(0, N); + double nu = 0; + EXPECT_NO_THROW(Kokkos::parallel_reduce( + "sample reduction", p, FunctorReductionWithLargeIterationCount(), nu)); + ASSERT_DOUBLE_EQ(nu, double(N)); +} +#endif + } // namespace Test From 4d7258c26ef1e2420645e863134f05692a292631 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Wed, 10 Apr 2024 14:20:00 -0600 Subject: [PATCH 4/9] MI300 support unified memory support (#6877) * Make Host backends be included before device backends There was an inclusion order issue when trying to enable unified memory, which was due to incomplete types. Effectively the Host backends must be defined before the device memory spaces for them to be marked accessible. We may have the same issue if we try to mark HostSpace accessible by device memory spaces - but I am not sure. * Add unified memory arch support for AMD This is intended for MI300A, but for now you have to set -DKokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY=ON This marks HIPSpace as host accessible, and thus will make create_mirror_view a no-op. * Fix two tests for AMD unified memory archs * Address review comments --- Makefile.kokkos | 32 ++++++++-------- .../src/sorting/impl/Kokkos_SortImpl.hpp | 5 +++ cmake/KokkosCore_config.h.in | 1 + cmake/kokkos_enable_devices.cmake | 37 ++++++++++--------- cmake/kokkos_enable_options.cmake | 1 + core/src/HIP/Kokkos_HIP.cpp | 4 ++ .../HIP/Kokkos_HIP_SharedAllocationRecord.cpp | 4 ++ .../HIP/Kokkos_HIP_SharedAllocationRecord.hpp | 4 ++ core/src/HIP/Kokkos_HIP_Space.hpp | 19 +++++++++- core/unit_test/hip/TestHIP_Spaces.cpp | 16 ++++++++ 10 files changed, 89 insertions(+), 34 deletions(-) diff --git a/Makefile.kokkos b/Makefile.kokkos index f78a7b9adcb..d9be7901a38 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -1222,6 +1222,22 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp") + ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") @@ -1241,26 +1257,10 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) diff --git a/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp index 4c174b5fda9..08946228919 100644 --- a/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp +++ b/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -399,9 +399,14 @@ sort_device_view_with_comparator( using ViewType = Kokkos::View; using MemSpace = typename ViewType::memory_space; +// Note with HIP unified memory this code path is still the right thing to do +// if we end up here when RocThrust is not enabled. +// The create_mirror_view_and_copy will do the right thing (no copy). +#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY static_assert(!SpaceAccessibility::accessible, "Impl::sort_device_view_with_comparator: should not be called " "on a view that is already accessible on the host"); +#endif copy_to_host_run_stdsort_copy_back(exec, view, comparator); } diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in index 2df0f6c5205..3ab39cd6abf 100644 --- a/cmake/KokkosCore_config.h.in +++ b/cmake/KokkosCore_config.h.in @@ -39,6 +39,7 @@ #cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC #cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS +#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY #cmakedefine KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH #cmakedefine KOKKOS_ENABLE_DEBUG #cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK diff --git a/cmake/kokkos_enable_devices.cmake b/cmake/kokkos_enable_devices.cmake index 9a977520a3a..c7d189285c5 100644 --- a/cmake/kokkos_enable_devices.cmake +++ b/cmake/kokkos_enable_devices.cmake @@ -40,6 +40,26 @@ ELSE() ENDIF() KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend") + +# We want this to default to OFF for cache reasons, but if no +# host space is given, then activate serial +IF (KOKKOS_HAS_TRILINOS) + #However, Trilinos always wants Serial ON + SET(SERIAL_DEFAULT ON) +ELSEIF (KOKKOS_HAS_HOST) + SET(SERIAL_DEFAULT OFF) +ELSE() + SET(SERIAL_DEFAULT ON) + IF (NOT DEFINED Kokkos_ENABLE_SERIAL) + MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt") + ENDIF() +ENDIF() +KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") + +KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") + +# Device backends have to come after host backends for header include order reasons +# Without this we can't make e.g. CudaSpace accessible by HostSpace KOKKOS_DEVICE_OPTION(OPENACC OFF DEVICE "Whether to build the OpenACC backend") IF (KOKKOS_ENABLE_OPENACC) COMPILER_SPECIFIC_FLAGS( @@ -90,23 +110,6 @@ IF (KOKKOS_ENABLE_CUDA) LIST(APPEND DEVICE_SETUP_LIST Cuda) ENDIF() -# We want this to default to OFF for cache reasons, but if no -# host space is given, then activate serial -IF (KOKKOS_HAS_TRILINOS) - #However, Trilinos always wants Serial ON - SET(SERIAL_DEFAULT ON) -ELSEIF (KOKKOS_HAS_HOST) - SET(SERIAL_DEFAULT OFF) -ELSE() - SET(SERIAL_DEFAULT ON) - IF (NOT DEFINED Kokkos_ENABLE_SERIAL) - MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt") - ENDIF() -ENDIF() -KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") - -KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") - KOKKOS_DEVICE_OPTION(HIP OFF DEVICE "Whether to build HIP backend") ## HIP has extra setup requirements, turn on Kokkos_Setup_HIP.hpp in macros diff --git a/cmake/kokkos_enable_options.cmake b/cmake/kokkos_enable_options.cmake index a437f6132aa..32788e7aa0f 100644 --- a/cmake/kokkos_enable_options.cmake +++ b/cmake/kokkos_enable_options.cmake @@ -70,6 +70,7 @@ KOKKOS_ENABLE_OPTION(TUNING OFF "Whether to create bindings for tu KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops") KOKKOS_ENABLE_OPTION(COMPILE_AS_CMAKE_LANGUAGE OFF "Whether to use native cmake language support") KOKKOS_ENABLE_OPTION(HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF "Whether multiple kernels are instantiated at compile time - improve performance but increase compile time") +KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for HIP") # This option will go away eventually, but allows fallback to old implementation when needed. KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation") diff --git a/core/src/HIP/Kokkos_HIP.cpp b/core/src/HIP/Kokkos_HIP.cpp index 309e07fb3fb..aced2083ffb 100644 --- a/core/src/HIP/Kokkos_HIP.cpp +++ b/core/src/HIP/Kokkos_HIP.cpp @@ -146,6 +146,10 @@ void HIP::print_configuration(std::ostream& os, bool /*verbose*/) const { #else os << "no\n"; #endif +#ifdef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY + os << " KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY: "; + os << "yes\n"; +#endif os << "\nRuntime Configuration:\n"; diff --git a/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp b/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp index ab24004f5fc..83f829fddae 100644 --- a/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp +++ b/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp @@ -23,8 +23,12 @@ #include #include +#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::HIPSpace); +#else +KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::HIPSpace); +#endif KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::HIPHostPinnedSpace); KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( diff --git a/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp b/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp index fbae5188344..1ca7bd5cd0e 100644 --- a/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp +++ b/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp @@ -20,8 +20,12 @@ #include #include +#if defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPSpace); +#else KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION( Kokkos::HIPSpace); +#endif KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPHostPinnedSpace); KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPManagedSpace); diff --git a/core/src/HIP/Kokkos_HIP_Space.hpp b/core/src/HIP/Kokkos_HIP_Space.hpp index 7f2004e5cbc..e1b4768b877 100644 --- a/core/src/HIP/Kokkos_HIP_Space.hpp +++ b/core/src/HIP/Kokkos_HIP_Space.hpp @@ -65,6 +65,18 @@ class HIPSpace { ~HIPSpace() = default; /**\brief Allocate untracked memory in the hip space */ +#ifdef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY + template + void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const { + return allocate(arg_alloc_size); + } + template + void* allocate(const ExecutionSpace&, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const { + return allocate(arg_label, arg_alloc_size, arg_logical_size); + } +#else // FIXME_HIP Use execution space instance void* allocate(const HIP&, const size_t arg_alloc_size) const { return allocate(arg_alloc_size); @@ -74,6 +86,7 @@ class HIPSpace { const size_t arg_logical_size = 0) const { return allocate(arg_label, arg_alloc_size, arg_logical_size); } +#endif void* allocate(const size_t arg_alloc_size) const; void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; @@ -267,7 +280,11 @@ static_assert(Kokkos::Impl::MemorySpaceAccess::assignable); template <> struct MemorySpaceAccess { enum : bool { assignable = false }; - enum : bool { accessible = false }; +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) + enum : bool{accessible = false}; +#else + enum : bool { accessible = true }; +#endif enum : bool { deepcopy = true }; }; diff --git a/core/unit_test/hip/TestHIP_Spaces.cpp b/core/unit_test/hip/TestHIP_Spaces.cpp index 8f7499c244b..673c0f0fff3 100644 --- a/core/unit_test/hip/TestHIP_Spaces.cpp +++ b/core/unit_test/hip/TestHIP_Spaces.cpp @@ -38,8 +38,13 @@ TEST(hip, space_access) { static_assert(!Kokkos::Impl::MemorySpaceAccess::assignable); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(!Kokkos::Impl::MemorySpaceAccess::accessible); +#else + static_assert(Kokkos::Impl::MemorySpaceAccess::accessible); +#endif static_assert( !Kokkos::Impl::MemorySpaceAccess::accessible); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(!Kokkos::SpaceAccessibility::accessible); +#else + static_assert(Kokkos::SpaceAccessibility::accessible); +#endif static_assert( Kokkos::SpaceAccessibility::accessible); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(std::is_same::Space, Kokkos::HostSpace>::value); +#else + static_assert(std::is_same::Space, + Kokkos::Device>::value); +#endif static_assert( std::is_same::Space, From fbab8bdf005e1eaab129ed0ba00ec9512eafd878 Mon Sep 17 00:00:00 2001 From: Geoffroy Lesur Date: Fri, 12 Apr 2024 20:56:29 +0200 Subject: [PATCH 5/9] bring back --fmad option to nvcc_wrapper (#6931) * bring back --fmad option to nvcc_wrapper * Preserve support for flag with single leading dash --------- Co-authored-by: Damien L-G --- bin/nvcc_wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/nvcc_wrapper b/bin/nvcc_wrapper index 9b935835d5f..dbfef2267fe 100755 --- a/bin/nvcc_wrapper +++ b/bin/nvcc_wrapper @@ -229,7 +229,7 @@ do fi ;; #Handle known nvcc args - --dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) + --dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|--fmad=*|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) cuda_args="$cuda_args $1" ;; #Handle more known nvcc args From cf7f87c19cfca655af35484e64af57694e98dc43 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 18 Apr 2024 19:29:23 -0600 Subject: [PATCH 6/9] Merge pull request #6951 from masterleinad/fix_serial_space_team_policy Serial: Use the provided execution space instance in TeamPolicy (cherry picked from commit cc602957cec5627c8752137c0412caa6fe37d2bc) --- core/src/Serial/Kokkos_Serial_Parallel_Team.hpp | 12 ++++++------ core/unit_test/TestExecSpacePartitioning.hpp | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp b/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp index f34a7daaca0..a25b51496ef 100644 --- a/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp +++ b/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp @@ -37,6 +37,8 @@ class TeamPolicyInternal int m_league_size; int m_chunk_size; + Kokkos::Serial m_space; + public: //! Tag this class as a kokkos execution policy using execution_policy = TeamPolicyInternal; @@ -46,10 +48,7 @@ class TeamPolicyInternal //! Execution space of this execution policy: using execution_space = Kokkos::Serial; - const typename traits::execution_space& space() const { - static typename traits::execution_space m_space; - return m_space; - } + const typename traits::execution_space& space() const { return m_space; } template friend class TeamPolicyInternal; @@ -116,12 +115,13 @@ class TeamPolicyInternal return (level == 0 ? 1024 * 32 : 20 * 1024 * 1024); } /** \brief Specify league size, request team size */ - TeamPolicyInternal(const execution_space&, int league_size_request, + TeamPolicyInternal(const execution_space& space, int league_size_request, int team_size_request, int /* vector_length_request */ = 1) : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, m_league_size(league_size_request), - m_chunk_size(32) { + m_chunk_size(32), + m_space(space) { if (team_size_request > 1) Kokkos::abort("Kokkos::abort: Requested Team Size is too large!"); } diff --git a/core/unit_test/TestExecSpacePartitioning.hpp b/core/unit_test/TestExecSpacePartitioning.hpp index 65314d6be7c..f8b570ab64d 100644 --- a/core/unit_test/TestExecSpacePartitioning.hpp +++ b/core/unit_test/TestExecSpacePartitioning.hpp @@ -28,6 +28,17 @@ struct SumFunctor { void operator()(int i, int& lsum) const { lsum += i; } }; +template +void check_space_member_for_policies(const ExecSpace& exec) { + Kokkos::RangePolicy range_policy(exec, 0, 1); + ASSERT_EQ(range_policy.space(), exec); + Kokkos::MDRangePolicy> mdrange_policy(exec, {0, 0}, + {1, 1}); + ASSERT_EQ(mdrange_policy.space(), exec); + Kokkos::TeamPolicy team_policy(exec, 1, Kokkos::AUTO); + ASSERT_EQ(team_policy.space(), exec); +} + template void check_distinctive([[maybe_unused]] ExecSpace exec1, [[maybe_unused]] ExecSpace exec2) { @@ -89,6 +100,9 @@ void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { void test_partitioning(std::vector& instances) { check_distinctive(instances[0], instances[1]); + check_space_member_for_policies(instances[0]); + check_space_member_for_policies(instances[1]); + int sum1, sum2; int N = 3910; run_threaded_test( From 9fccb610750d7127b821f15caccfe1f468c91299 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 1 May 2024 16:39:17 -0600 Subject: [PATCH 7/9] Update changelog for 4.3.01 --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a412995da0f..4fbc9002973 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,15 @@ ## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) +### Backend and Architecture Enhancements: + +#### HIP: +* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877) + ### Bug Fixes +* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951) +* `nvcc_wrapper`: bring back support for `--fmad` option [\#6931](https://github.com/kokkos/kokkos/pull/6931) +* Fix CUDA reduction overflow for `RangePolicy` [\#6578](https://github.com/kokkos/kokkos/pull/6578) ## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00) From 83498bdc6027ad64096ade21a4be7776b81c7715 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 2 May 2024 09:22:10 -0600 Subject: [PATCH 8/9] Fix Copyright file --- Copyright.txt | 49 ++++++++----------------------------------------- LICENSE | 10 ---------- 2 files changed, 8 insertions(+), 51 deletions(-) diff --git a/Copyright.txt b/Copyright.txt index 5e2f8d8647b..cbba3efc7bc 100644 --- a/Copyright.txt +++ b/Copyright.txt @@ -1,41 +1,8 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER +************************************************************************ + + Kokkos v. 4.0 + Copyright (2022) National Technology & Engineering + Solutions of Sandia, LLC (NTESS). + +Under the terms of Contract DE-NA0003525 with NTESS, +the U.S. Government retains certain rights in this software. diff --git a/LICENSE b/LICENSE index 6572cc2db05..4d9d69d7c44 100644 --- a/LICENSE +++ b/LICENSE @@ -1,13 +1,3 @@ - ************************************************************************ - - Kokkos v. 4.0 - Copyright (2022) National Technology & Engineering - Solutions of Sandia, LLC (NTESS). - - Under the terms of Contract DE-NA0003525 with NTESS, - the U.S. Government retains certain rights in this software. - - ============================================================================== Kokkos is under the Apache License v2.0 with LLVM Exceptions: ============================================================================== From c80cdafef30d02b00577c1dbbae2e925314f45a5 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Tue, 7 May 2024 11:15:43 -0600 Subject: [PATCH 9/9] update master_history.txt --- master_history.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/master_history.txt b/master_history.txt index bd122a456bd..31be9253254 100644 --- a/master_history.txt +++ b/master_history.txt @@ -36,3 +36,4 @@ tag: 4.1.00 date: 06:20:2023 master: 62d2b6c8 release: adde1e6a tag: 4.2.00 date: 11:09:2023 master: 1a3ea28f release: abe01c88 tag: 4.2.01 date: 01:30:2024 master: 71a9bcae release: 221e5f7a tag: 4.3.00 date: 04:03:2024 master: e0dc0128 release: f08217a4 +tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e