From 5dee59924e16146669a30a6d00f94d5737f3f3aa Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 24 Jan 2024 11:08:14 -0800 Subject: [PATCH 1/7] Add implementation of atomic mult for GPU --- Src/Base/AMReX_Functional.H | 18 ++++++++++++++ Src/Base/AMReX_GpuAtomic.H | 49 ++++++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/Src/Base/AMReX_Functional.H b/Src/Base/AMReX_Functional.H index bed6e7a4267..0098365db4a 100644 --- a/Src/Base/AMReX_Functional.H +++ b/Src/Base/AMReX_Functional.H @@ -63,6 +63,24 @@ struct LogicalOr } }; +template +struct Multiplies +{ + constexpr T operator() (const T & lhs, const T & rhs) const + { + return lhs * rhs; + } +}; + +template +struct Divides +{ + constexpr T operator() (const T & lhs, const T & rhs) const + { + return lhs / rhs; + } +}; + } #endif diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index deea6ae932e..b6f905a597c 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -17,7 +17,7 @@ namespace Gpu::Atomic { // For Exch and CAS, the data type is generic. // All these functions are non-atomic in host code!!! // If one needs them to be atomic in host code, use HostDevice::Atomic::*. Currently only -// HostDevice::Atomic is supported. We could certainly add more. +// HostDevice::Atomic::Add is supported. We could certainly add more. namespace detail { @@ -525,7 +525,54 @@ namespace detail { return old; )) #endif + +//////////////////////////////////////////////////////////////////////// +// Multiply +//////////////////////////////////////////////////////////////////////// + +#ifdef AMREX_USE_GPU + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + T Multiply_device (T* const prod, T const value) noexcept + { + amrex::Abort("Gpu::Atomic::Multiply is not implemented for this data type."); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + float Multiply_device (float* const prod, float const value) noexcept + { + return detail::atomic_op(prod,value,amrex::Multiplies()); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + double Multiply_device (double* const prod, double const value) noexcept + { + return detail::atomic_op(prod,value,amrex::Multiplies()); } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + Long Multiply_device (Long* const prod, Long const value) noexcept + { + return detail::atomic_op(prod,value,amrex::Multiplies()); + } + +#endif + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T Multiply (T* const prod, T const value) noexcept + { + AMREX_IF_ON_DEVICE(( + return Multiply_device(prod, value); + )) + AMREX_IF_ON_HOST(( + auto const old = *prod; + *prod *= value; + return old; + )) + } + } namespace HostDevice::Atomic { From 1cb9b0810fe049e660b3148a7e00ee34550c34b0 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 24 Jan 2024 11:13:29 -0800 Subject: [PATCH 2/7] fix syntax error --- Src/Base/AMReX_GpuAtomic.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index b6f905a597c..0964d2c5da2 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -525,6 +525,7 @@ namespace detail { return old; )) #endif + } //////////////////////////////////////////////////////////////////////// // Multiply @@ -572,7 +573,6 @@ namespace detail { return old; )) } - } namespace HostDevice::Atomic { From 7a6157ffd159e90397d462a7a70ec8c2eda36774 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 24 Jan 2024 12:15:40 -0800 Subject: [PATCH 3/7] finish implementing Multiply and Divide --- Src/Base/AMReX_GpuAtomic.H | 85 +++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index 0964d2c5da2..0abc813a1a6 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -12,7 +12,7 @@ namespace amrex { namespace Gpu::Atomic { -// For Add, Min and Max, we support int, unsigned int, long, unsigned long long, float and double. +// For Add, Multiply, Divide, Min and Max, we support int, unsigned int, long, unsigned long long, float and double. // For LogicalOr and LogicalAnd, the data type is int. // For Exch and CAS, the data type is generic. // All these functions are non-atomic in host code!!! @@ -552,6 +552,24 @@ namespace detail { return detail::atomic_op(prod,value,amrex::Multiplies()); } + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + int Multiply_device (int* const prod, int const value) noexcept + { + return detail::atomic_op(prod,value,amrex::Multiplies()); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + unsigned int Multiply_device (unsigned int* const prod, unsigned int const value) noexcept + { + return detail::atomic_op(prod,value,amrex::Multiplies()); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + unsigned long long int Multiply_device (unsigned long long int* const prod, unsigned long long int const value) noexcept + { + return detail::atomic_op(prod,value,amrex::Multiplies()); + } + AMREX_GPU_DEVICE AMREX_FORCE_INLINE Long Multiply_device (Long* const prod, Long const value) noexcept { @@ -573,6 +591,71 @@ namespace detail { return old; )) } + +//////////////////////////////////////////////////////////////////////// +// Divide +//////////////////////////////////////////////////////////////////////// + +#ifdef AMREX_USE_GPU + + template + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + T Divide_device (T* const quot, T const value) noexcept + { + amrex::Abort("Gpu::Atomic::Divide is not implemented for this data type."); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + float Divide_device (float* const quot, float const value) noexcept + { + return detail::atomic_op(quot,value,amrex::Divides()); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + double Divide_device (double* const quot, double const value) noexcept + { + return detail::atomic_op(quot,value,amrex::Divides()); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + int Divide_device (int* const quot, int const value) noexcept + { + return detail::atomic_op(quot,value,amrex::Divides()); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + unsigned int Divide_device (unsigned int* const quot, unsigned int const value) noexcept + { + return detail::atomic_op(quot,value,amrex::Divides()); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + unsigned long long int Divide_device (unsigned long long int* const quot, unsigned long long int const value) noexcept + { + return detail::atomic_op(quot,value,amrex::Divides()); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + Long Divide_device (Long* const quot, Long const value) noexcept + { + return detail::atomic_op(quot,value,amrex::Divides()); + } + +#endif + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T Divide (T* const quot, T const value) noexcept + { + AMREX_IF_ON_DEVICE(( + return Divide_device(quot, value); + )) + AMREX_IF_ON_HOST(( + auto const old = *quot; + *quot /= value; + return old; + )) + } } namespace HostDevice::Atomic { From 1a6b97060d72f832df2d805e828f079cab11bca0 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 24 Jan 2024 12:26:44 -0800 Subject: [PATCH 4/7] fix unused --- Src/Base/AMReX_GpuAtomic.H | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index 0abc813a1a6..021c73fc317 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -535,7 +535,7 @@ namespace detail { template AMREX_GPU_DEVICE AMREX_FORCE_INLINE - T Multiply_device (T* const prod, T const value) noexcept + T Multiply_device (T* const /*prod*/, T const /*value*/) noexcept { amrex::Abort("Gpu::Atomic::Multiply is not implemented for this data type."); } @@ -600,7 +600,7 @@ namespace detail { template AMREX_GPU_DEVICE AMREX_FORCE_INLINE - T Divide_device (T* const quot, T const value) noexcept + T Divide_device (T* const /*quot*/, T const /*value*/) noexcept { amrex::Abort("Gpu::Atomic::Divide is not implemented for this data type."); } From 9ee18766d50ea23bcadc0ad5ba9f008ddc2e7e34 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 25 Jan 2024 11:24:22 -0800 Subject: [PATCH 5/7] provide generic implementation --- Src/Base/AMReX_GpuAtomic.H | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index 021c73fc317..4f46b9d2450 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -13,6 +13,8 @@ namespace amrex { namespace Gpu::Atomic { // For Add, Multiply, Divide, Min and Max, we support int, unsigned int, long, unsigned long long, float and double. +// For Multiply and Divide, we also support user-defined types provided are the same size as int or unsigned long long +// and have user-defined *= and /= operators. // For LogicalOr and LogicalAnd, the data type is int. // For Exch and CAS, the data type is generic. // All these functions are non-atomic in host code!!! @@ -533,11 +535,18 @@ namespace detail { #ifdef AMREX_USE_GPU - template + template = 0> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + T Multiply_device (T* const prod, T const value) noexcept + { + return detail::atomic_op(prod,value,amrex::Multiplies()); + } + + template = 0> AMREX_GPU_DEVICE AMREX_FORCE_INLINE - T Multiply_device (T* const /*prod*/, T const /*value*/) noexcept + T Multiply_device (T* const prod, T const value) noexcept { - amrex::Abort("Gpu::Atomic::Multiply is not implemented for this data type."); + return detail::atomic_op(prod,value,amrex::Multiplies()); } AMREX_GPU_DEVICE AMREX_FORCE_INLINE @@ -598,11 +607,18 @@ namespace detail { #ifdef AMREX_USE_GPU - template + template = 0> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + T Divide_device (T* const quot, T const value) noexcept + { + return detail::atomic_op(quot,value,amrex::Divides()); + } + + template = 0> AMREX_GPU_DEVICE AMREX_FORCE_INLINE - T Divide_device (T* const /*quot*/, T const /*value*/) noexcept + T Divides_device (T* const quot, T const value) noexcept { - amrex::Abort("Gpu::Atomic::Divide is not implemented for this data type."); + return detail::atomic_op(quot,value,amrex::Divides()); } AMREX_GPU_DEVICE AMREX_FORCE_INLINE From a76e94e8a05a169789b7b2f201ff4525c3a1adb6 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 25 Jan 2024 11:39:45 -0800 Subject: [PATCH 6/7] remove specializations --- Src/Base/AMReX_GpuAtomic.H | 78 ++------------------------------------ 1 file changed, 3 insertions(+), 75 deletions(-) diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index 4f46b9d2450..c51a5a2ea55 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -12,9 +12,9 @@ namespace amrex { namespace Gpu::Atomic { -// For Add, Multiply, Divide, Min and Max, we support int, unsigned int, long, unsigned long long, float and double. -// For Multiply and Divide, we also support user-defined types provided are the same size as int or unsigned long long -// and have user-defined *= and /= operators. +// For Add, Min and Max, we support int, unsigned int, long, unsigned long long, float and double. +// For Multiply and Divide, we support generic types provided they are the same size as int or unsigned long long +// and have *= and /= operators. // For LogicalOr and LogicalAnd, the data type is int. // For Exch and CAS, the data type is generic. // All these functions are non-atomic in host code!!! @@ -549,42 +549,6 @@ namespace detail { return detail::atomic_op(prod,value,amrex::Multiplies()); } - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - float Multiply_device (float* const prod, float const value) noexcept - { - return detail::atomic_op(prod,value,amrex::Multiplies()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - double Multiply_device (double* const prod, double const value) noexcept - { - return detail::atomic_op(prod,value,amrex::Multiplies()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - int Multiply_device (int* const prod, int const value) noexcept - { - return detail::atomic_op(prod,value,amrex::Multiplies()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - unsigned int Multiply_device (unsigned int* const prod, unsigned int const value) noexcept - { - return detail::atomic_op(prod,value,amrex::Multiplies()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - unsigned long long int Multiply_device (unsigned long long int* const prod, unsigned long long int const value) noexcept - { - return detail::atomic_op(prod,value,amrex::Multiplies()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - Long Multiply_device (Long* const prod, Long const value) noexcept - { - return detail::atomic_op(prod,value,amrex::Multiplies()); - } - #endif template @@ -621,42 +585,6 @@ namespace detail { return detail::atomic_op(quot,value,amrex::Divides()); } - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - float Divide_device (float* const quot, float const value) noexcept - { - return detail::atomic_op(quot,value,amrex::Divides()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - double Divide_device (double* const quot, double const value) noexcept - { - return detail::atomic_op(quot,value,amrex::Divides()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - int Divide_device (int* const quot, int const value) noexcept - { - return detail::atomic_op(quot,value,amrex::Divides()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - unsigned int Divide_device (unsigned int* const quot, unsigned int const value) noexcept - { - return detail::atomic_op(quot,value,amrex::Divides()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - unsigned long long int Divide_device (unsigned long long int* const quot, unsigned long long int const value) noexcept - { - return detail::atomic_op(quot,value,amrex::Divides()); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - Long Divide_device (Long* const quot, Long const value) noexcept - { - return detail::atomic_op(quot,value,amrex::Divides()); - } - #endif template From 58e9394bbe10a73e841cc4ec0fed8e65726d2c57 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 25 Jan 2024 11:53:52 -0800 Subject: [PATCH 7/7] Update Src/Base/AMReX_GpuAtomic.H Co-authored-by: Weiqun Zhang --- Src/Base/AMReX_GpuAtomic.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index c51a5a2ea55..9adc655298e 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -580,7 +580,7 @@ namespace detail { template = 0> AMREX_GPU_DEVICE AMREX_FORCE_INLINE - T Divides_device (T* const quot, T const value) noexcept + T Divide_device (T* const quot, T const value) noexcept { return detail::atomic_op(quot,value,amrex::Divides()); }