Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide portable Gpu::Atomic::Multiply and Gpu::Atomic::Divide implemented with CAS. #3724

Merged
merged 7 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Src/Base/AMReX_Functional.H
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,24 @@ struct LogicalOr
}
};

template <typename T>
struct Multiplies
{
constexpr T operator() (const T & lhs, const T & rhs) const
{
return lhs * rhs;
}
};

template <typename T>
struct Divides
{
constexpr T operator() (const T & lhs, const T & rhs) const
{
return lhs / rhs;
}
};

}

#endif
76 changes: 75 additions & 1 deletion Src/Base/AMReX_GpuAtomic.H
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ namespace amrex {
namespace Gpu::Atomic {

// For Add, Min and Max, we support int, unsigned int, long, unsigned long long, float and double.
// For Multiply and Divide, we support generic types provided they are the same size as int or unsigned long long
// and have *= and /= operators.
// For LogicalOr and LogicalAnd, the data type is int.
// For Exch and CAS, the data type is generic.
// All these functions are non-atomic in host code!!!
// If one needs them to be atomic in host code, use HostDevice::Atomic::*. Currently only
// HostDevice::Atomic is supported. We could certainly add more.
// HostDevice::Atomic::Add is supported. We could certainly add more.

namespace detail {

Expand Down Expand Up @@ -526,6 +528,78 @@ namespace detail {
))
#endif
}

////////////////////////////////////////////////////////////////////////
// Multiply
////////////////////////////////////////////////////////////////////////

#ifdef AMREX_USE_GPU

template <typename T, std::enable_if_t<sizeof(T) == sizeof(int), int> = 0>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
T Multiply_device (T* const prod, T const value) noexcept
{
return detail::atomic_op<T, int>(prod,value,amrex::Multiplies<T>());
}

template <typename T, std::enable_if_t<sizeof(T) == sizeof(unsigned long long), int> = 0>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
T Multiply_device (T* const prod, T const value) noexcept
{
return detail::atomic_op<T, unsigned long long>(prod,value,amrex::Multiplies<T>());
}

#endif

template<class T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T Multiply (T* const prod, T const value) noexcept
{
AMREX_IF_ON_DEVICE((
return Multiply_device(prod, value);
))
AMREX_IF_ON_HOST((
auto const old = *prod;
*prod *= value;
return old;
))
}

////////////////////////////////////////////////////////////////////////
// Divide
////////////////////////////////////////////////////////////////////////

#ifdef AMREX_USE_GPU

template <typename T, std::enable_if_t<sizeof(T) == sizeof(int), int> = 0>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
T Divide_device (T* const quot, T const value) noexcept
{
return detail::atomic_op<T, int>(quot,value,amrex::Divides<T>());
}

template <typename T, std::enable_if_t<sizeof(T) == sizeof(unsigned long long), int> = 0>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
T Divide_device (T* const quot, T const value) noexcept
{
return detail::atomic_op<T, unsigned long long>(quot,value,amrex::Divides<T>());
}

#endif

template<class T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T Divide (T* const quot, T const value) noexcept
{
AMREX_IF_ON_DEVICE((
return Divide_device(quot, value);
))
AMREX_IF_ON_HOST((
auto const old = *quot;
*quot /= value;
return old;
))
}
}

namespace HostDevice::Atomic {
Expand Down
Loading