Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide portable Gpu::Atomic::Multiply and Gpu::Atomic::Divide implemented with CAS. #3724

Merged
merged 7 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Src/Base/AMReX_Functional.H
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,24 @@ struct LogicalOr
}
};

template <typename T>
struct Multiplies
{
constexpr T operator() (const T & lhs, const T & rhs) const
{
return lhs * rhs;
}
};

template <typename T>
struct Divides
{
constexpr T operator() (const T & lhs, const T & rhs) const
{
return lhs / rhs;
}
};

}

#endif
134 changes: 132 additions & 2 deletions Src/Base/AMReX_GpuAtomic.H
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ namespace amrex {

namespace Gpu::Atomic {

// For Add, Min and Max, we support int, unsigned int, long, unsigned long long, float and double.
// For Add, Multiply, Divide, Min and Max, we support int, unsigned int, long, unsigned long long, float and double.
// For LogicalOr and LogicalAnd, the data type is int.
// For Exch and CAS, the data type is generic.
// All these functions are non-atomic in host code!!!
// If one needs them to be atomic in host code, use HostDevice::Atomic::*. Currently only
// HostDevice::Atomic is supported. We could certainly add more.
// HostDevice::Atomic::Add is supported. We could certainly add more.

namespace detail {

Expand Down Expand Up @@ -526,6 +526,136 @@ namespace detail {
))
#endif
}

////////////////////////////////////////////////////////////////////////
// Multiply
////////////////////////////////////////////////////////////////////////

#ifdef AMREX_USE_GPU

template<class T>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
T Multiply_device (T* const /*prod*/, T const /*value*/) noexcept
{
amrex::Abort("Gpu::Atomic::Multiply is not implemented for this data type.");
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
float Multiply_device (float* const prod, float const value) noexcept
{
return detail::atomic_op<float,int>(prod,value,amrex::Multiplies<float>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
double Multiply_device (double* const prod, double const value) noexcept
{
return detail::atomic_op<double, unsigned long long int>(prod,value,amrex::Multiplies<double>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
int Multiply_device (int* const prod, int const value) noexcept
{
return detail::atomic_op<int, int>(prod,value,amrex::Multiplies<int>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
unsigned int Multiply_device (unsigned int* const prod, unsigned int const value) noexcept
{
return detail::atomic_op<unsigned int, int>(prod,value,amrex::Multiplies<unsigned int>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
unsigned long long int Multiply_device (unsigned long long int* const prod, unsigned long long int const value) noexcept
{
return detail::atomic_op<unsigned long long int, unsigned long long int>(prod,value,amrex::Multiplies<unsigned long long int>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
Long Multiply_device (Long* const prod, Long const value) noexcept
{
return detail::atomic_op<Long, unsigned long long int>(prod,value,amrex::Multiplies<Long>());
}

#endif

template<class T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T Multiply (T* const prod, T const value) noexcept
{
AMREX_IF_ON_DEVICE((
return Multiply_device(prod, value);
))
AMREX_IF_ON_HOST((
auto const old = *prod;
*prod *= value;
return old;
))
}

////////////////////////////////////////////////////////////////////////
// Divide
////////////////////////////////////////////////////////////////////////

#ifdef AMREX_USE_GPU

template<class T>
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
T Divide_device (T* const /*quot*/, T const /*value*/) noexcept
{
amrex::Abort("Gpu::Atomic::Divide is not implemented for this data type.");
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
float Divide_device (float* const quot, float const value) noexcept
{
return detail::atomic_op<float,int>(quot,value,amrex::Divides<float>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
double Divide_device (double* const quot, double const value) noexcept
{
return detail::atomic_op<double, unsigned long long int>(quot,value,amrex::Divides<double>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
int Divide_device (int* const quot, int const value) noexcept
{
return detail::atomic_op<int, int>(quot,value,amrex::Divides<int>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
unsigned int Divide_device (unsigned int* const quot, unsigned int const value) noexcept
{
return detail::atomic_op<unsigned int, int>(quot,value,amrex::Divides<unsigned int>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
unsigned long long int Divide_device (unsigned long long int* const quot, unsigned long long int const value) noexcept
{
return detail::atomic_op<unsigned long long int, unsigned long long int>(quot,value,amrex::Divides<unsigned long long int>());
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
Long Divide_device (Long* const quot, Long const value) noexcept
{
return detail::atomic_op<Long, unsigned long long int>(quot,value,amrex::Divides<Long>());
}

#endif

template<class T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T Divide (T* const quot, T const value) noexcept
{
AMREX_IF_ON_DEVICE((
return Divide_device(quot, value);
))
AMREX_IF_ON_HOST((
auto const old = *quot;
*quot /= value;
return old;
))
}
}

namespace HostDevice::Atomic {
Expand Down
Loading