From f2c1084c682dc1c0826cccf946bb683a541872c2 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 8 Jun 2024 20:31:11 -0400 Subject: [PATCH 1/6] initial version --- Src/Base/AMReX_GpuControl.H | 4 +++ Src/Base/AMReX_GpuDevice.cpp | 6 +++- Src/Base/AMReX_PArena.H | 2 +- Src/Base/AMReX_PArena.cpp | 54 ++++++++++++++++++++++++------------ 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/Src/Base/AMReX_GpuControl.H b/Src/Base/AMReX_GpuControl.H index 1532045bec4..4cc8abffdca 100644 --- a/Src/Base/AMReX_GpuControl.H +++ b/Src/Base/AMReX_GpuControl.H @@ -11,6 +11,10 @@ #define AMREX_CUDA_GE_11_2 1 #endif +#if defined(AMREX_USE_HIP) || defined(AMREX_CUDA_GE_11_2) +#define AMREX_GPU_STREAM_ALLOC_SUPPORT 1 +#endif + #if defined(AMREX_USE_HIP) #define AMREX_HIP_OR_CUDA(a,b) a #elif defined(AMREX_USE_CUDA) diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index 7fa0d2011fb..d9bebfe4fcd 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -386,13 +386,17 @@ Device::initialize_gpu () AMREX_HIP_SAFE_CALL(hipStreamCreate(&gpu_stream_pool[i])); } +#ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT + hipDeviceGetAttribute(&memory_pools_supported, hipDevAttrMemoryPoolsSupported, device_id); +#endif + #elif defined(AMREX_USE_CUDA) AMREX_CUDA_SAFE_CALL(cudaGetDeviceProperties(&device_prop, device_id)); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(device_prop.major >= 4 || (device_prop.major == 3 && device_prop.minor >= 5), "Compute capability must be >= 3.5"); -#ifdef AMREX_CUDA_GE_11_2 +#ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT cudaDeviceGetAttribute(&memory_pools_supported, cudaDevAttrMemoryPoolsSupported, device_id); #endif diff --git a/Src/Base/AMReX_PArena.H b/Src/Base/AMReX_PArena.H index 75db747fd9f..58d4c1d79c3 100644 --- a/Src/Base/AMReX_PArena.H +++ b/Src/Base/AMReX_PArena.H @@ -42,7 +42,7 @@ public: [[nodiscard]] bool isStreamOrderedArena () const final { return true; } #endif -#ifdef AMREX_CUDA_GE_11_2 +#ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT private: cudaMemPool_t m_pool; cuuint64_t m_old_release_threshold; diff --git a/Src/Base/AMReX_PArena.cpp b/Src/Base/AMReX_PArena.cpp index 82781a7be65..3329828f42d 100644 --- a/Src/Base/AMReX_PArena.cpp +++ b/Src/Base/AMReX_PArena.cpp @@ -11,13 +11,23 @@ namespace amrex { PArena::PArena (Long release_threshold) { -#ifdef AMREX_CUDA_GE_11_2 +#ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT if (Gpu::Device::memoryPoolsSupported()) { - AMREX_CUDA_SAFE_CALL(cudaDeviceGetMemPool(&m_pool, Gpu::Device::deviceId())); - AMREX_CUDA_SAFE_CALL(cudaMemPoolGetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, - &m_old_release_threshold)); + AMREX_HIP_OR_CUDA( + AMREX_HIP_SAFE_CALL(hipDeviceGetMemPool(&m_pool, Gpu::Device::deviceId())); + AMREX_CUDA_SAFE_CALL(cudaDeviceGetMemPool(&m_pool, Gpu::Device::deviceId())); + ) + AMREX_HIP_OR_CUDA( + AMREX_HIP_SAFE_CALL(hipMemPoolGetAttribute(m_pool, hipMemPoolAttrReleaseThreshold, + &m_old_release_threshold)); + AMREX_CUDA_SAFE_CALL(cudaMemPoolGetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, + &m_old_release_threshold)); + ) cuuint64_t value = release_threshold; - AMREX_CUDA_SAFE_CALL(cudaMemPoolSetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, &value)); + AMREX_HIP_OR_CUDA( + AMREX_HIP_SAFE_CALL(hipMemPoolSetAttribute(m_pool, hipMemPoolAttrReleaseThreshold, &value)); + AMREX_CUDA_SAFE_CALL(cudaMemPoolSetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, &value)); + ) } #endif amrex::ignore_unused(release_threshold); @@ -25,10 +35,14 @@ PArena::PArena (Long release_threshold) PArena::~PArena () // NOLINT(modernize-use-equals-default) { -#ifdef AMREX_CUDA_GE_11_2 +#ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT if (Gpu::Device::memoryPoolsSupported()) { - AMREX_CUDA_SAFE_CALL(cudaMemPoolSetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, - &m_old_release_threshold)); + AMREX_HIP_OR_CUDA( + AMREX_HIP_SAFE_CALL(hipMemPoolSetAttribute(m_pool, hipMemPoolAttrReleaseThreshold, + &m_old_release_threshold)); + AMREX_CUDA_SAFE_CALL(cudaMemPoolSetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, + &m_old_release_threshold)); + ) } #endif } @@ -38,10 +52,13 @@ PArena::alloc (std::size_t nbytes) { #if defined(AMREX_USE_GPU) -#ifdef AMREX_CUDA_GE_11_2 +#if defined(AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { void* p; - AMREX_CUDA_SAFE_CALL(cudaMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream())); + AMREX_HIP_OR_CUDA( + AMREX_HIP_SAFE_CALL(hipMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream())); + AMREX_CUDA_SAFE_CALL(cudaMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream())); + ) return p; } else #endif @@ -71,9 +88,12 @@ PArena::free (void* p) #if defined(AMREX_USE_GPU) -#ifdef AMREX_CUDA_GE_11_2 +#if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { - AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream())); + AMREX_HIP_OR_CUDA( + AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream())); + AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream())); + ) } else #endif { @@ -99,7 +119,7 @@ bool PArena::isDeviceAccessible () const { #ifdef AMREX_USE_GPU -#ifdef AMREX_CUDA_GE_11_2 +#ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT if (Gpu::Device::memoryPoolsSupported()) { return true; } else @@ -116,7 +136,7 @@ bool PArena::isHostAccessible () const { #ifdef AMREX_USE_GPU -#ifdef AMREX_CUDA_GE_11_2 +#if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { return false; // cudaMallocAsync allocates device memory } else @@ -133,7 +153,7 @@ bool PArena::isManaged () const { #ifdef AMREX_USE_GPU -#ifdef AMREX_CUDA_GE_11_2 +#if defined(AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { return false; // cudaMallocAsync allocates device memory } else @@ -150,7 +170,7 @@ bool PArena::isDevice () const { #ifdef AMREX_USE_GPU -#ifdef AMREX_CUDA_GE_11_2 +#if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { return true; // cudaMallocAsync allocates device memory } else @@ -167,7 +187,7 @@ bool PArena::isPinned () const { #ifdef AMREX_USE_GPU -#ifdef AMREX_CUDA_GE_11_2 +#if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { return false; // cudaMallocAsync allocates device memory } else From 2a0f68cfe96d384c7a6b1dae7a32f9eb943201e4 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 8 Jun 2024 20:41:18 -0400 Subject: [PATCH 2/6] fix macro usage --- Src/Base/AMReX_PArena.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Src/Base/AMReX_PArena.cpp b/Src/Base/AMReX_PArena.cpp index 3329828f42d..1f8f395ee9b 100644 --- a/Src/Base/AMReX_PArena.cpp +++ b/Src/Base/AMReX_PArena.cpp @@ -14,18 +14,18 @@ PArena::PArena (Long release_threshold) #ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT if (Gpu::Device::memoryPoolsSupported()) { AMREX_HIP_OR_CUDA( - AMREX_HIP_SAFE_CALL(hipDeviceGetMemPool(&m_pool, Gpu::Device::deviceId())); + AMREX_HIP_SAFE_CALL(hipDeviceGetMemPool(&m_pool, Gpu::Device::deviceId()));, AMREX_CUDA_SAFE_CALL(cudaDeviceGetMemPool(&m_pool, Gpu::Device::deviceId())); ) AMREX_HIP_OR_CUDA( AMREX_HIP_SAFE_CALL(hipMemPoolGetAttribute(m_pool, hipMemPoolAttrReleaseThreshold, - &m_old_release_threshold)); + &m_old_release_threshold));, AMREX_CUDA_SAFE_CALL(cudaMemPoolGetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, &m_old_release_threshold)); ) cuuint64_t value = release_threshold; AMREX_HIP_OR_CUDA( - AMREX_HIP_SAFE_CALL(hipMemPoolSetAttribute(m_pool, hipMemPoolAttrReleaseThreshold, &value)); + AMREX_HIP_SAFE_CALL(hipMemPoolSetAttribute(m_pool, hipMemPoolAttrReleaseThreshold, &value));, AMREX_CUDA_SAFE_CALL(cudaMemPoolSetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, &value)); ) } @@ -39,7 +39,7 @@ PArena::~PArena () // NOLINT(modernize-use-equals-default) if (Gpu::Device::memoryPoolsSupported()) { AMREX_HIP_OR_CUDA( AMREX_HIP_SAFE_CALL(hipMemPoolSetAttribute(m_pool, hipMemPoolAttrReleaseThreshold, - &m_old_release_threshold)); + &m_old_release_threshold));, AMREX_CUDA_SAFE_CALL(cudaMemPoolSetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, &m_old_release_threshold)); ) @@ -56,7 +56,7 @@ PArena::alloc (std::size_t nbytes) if (Gpu::Device::memoryPoolsSupported()) { void* p; AMREX_HIP_OR_CUDA( - AMREX_HIP_SAFE_CALL(hipMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream())); + AMREX_HIP_SAFE_CALL(hipMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream())); ) return p; @@ -91,7 +91,7 @@ PArena::free (void* p) #if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { AMREX_HIP_OR_CUDA( - AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream())); + AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream())); ) } else From 34e69f383687189660e8c37dc7ebc74d24d69d14 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 8 Jun 2024 20:54:45 -0400 Subject: [PATCH 3/6] use uint64_t for HIP --- Src/Base/AMReX_PArena.H | 10 ++++++++-- Src/Base/AMReX_PArena.cpp | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Src/Base/AMReX_PArena.H b/Src/Base/AMReX_PArena.H index 58d4c1d79c3..74acb81ac8e 100644 --- a/Src/Base/AMReX_PArena.H +++ b/Src/Base/AMReX_PArena.H @@ -44,8 +44,14 @@ public: #ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT private: - cudaMemPool_t m_pool; - cuuint64_t m_old_release_threshold; + AMREX_HIP_OR_CUDA( + hipMemPool_t m_pool;, + cudaMemPool_t m_pool; + ) + AMREX_HIP_OR_CUDA( + uint64_t m_old_release_threshold; + cuuint64_t m_old_release_threshold; + ) #endif }; diff --git a/Src/Base/AMReX_PArena.cpp b/Src/Base/AMReX_PArena.cpp index 1f8f395ee9b..36155f3d32c 100644 --- a/Src/Base/AMReX_PArena.cpp +++ b/Src/Base/AMReX_PArena.cpp @@ -23,7 +23,10 @@ PArena::PArena (Long release_threshold) AMREX_CUDA_SAFE_CALL(cudaMemPoolGetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, &m_old_release_threshold)); ) - cuuint64_t value = release_threshold; + AMREX_HIP_OR_CUDA( + uint64_t value = release_threshold;, + cuuint64_t value = release_threshold; + ) AMREX_HIP_OR_CUDA( AMREX_HIP_SAFE_CALL(hipMemPoolSetAttribute(m_pool, hipMemPoolAttrReleaseThreshold, &value));, AMREX_CUDA_SAFE_CALL(cudaMemPoolSetAttribute(m_pool, cudaMemPoolAttrReleaseThreshold, &value)); From a258a09614bce9b24a9053ed2bb9f0ff0dae7b5c Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 8 Jun 2024 20:57:57 -0400 Subject: [PATCH 4/6] fix typo --- Src/Base/AMReX_PArena.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_PArena.H b/Src/Base/AMReX_PArena.H index 74acb81ac8e..0c105de185f 100644 --- a/Src/Base/AMReX_PArena.H +++ b/Src/Base/AMReX_PArena.H @@ -49,7 +49,7 @@ private: cudaMemPool_t m_pool; ) AMREX_HIP_OR_CUDA( - uint64_t m_old_release_threshold; + uint64_t m_old_release_threshold;, cuuint64_t m_old_release_threshold; ) #endif From cad73645711edb021fd380d5385ddacacbd0de7f Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 8 Jun 2024 21:01:56 -0400 Subject: [PATCH 5/6] cudaDevAttribute -> hipDeviceAttribute --- Src/Base/AMReX_GpuDevice.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index d9bebfe4fcd..9aa2c9171a9 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -387,7 +387,7 @@ Device::initialize_gpu () } #ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT - hipDeviceGetAttribute(&memory_pools_supported, hipDevAttrMemoryPoolsSupported, device_id); + hipDeviceGetAttribute(&memory_pools_supported, hipDeviceAttributeMemoryPoolsSupported, device_id); #endif #elif defined(AMREX_USE_CUDA) From 401fde401d610b3584bdb7818a8cf9fe52539a92 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 8 Jun 2024 21:36:39 -0400 Subject: [PATCH 6/6] fix [[no_discard]] error --- Src/Base/AMReX_GpuDevice.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index 9aa2c9171a9..193b73b7af9 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -387,7 +387,7 @@ Device::initialize_gpu () } #ifdef AMREX_GPU_STREAM_ALLOC_SUPPORT - hipDeviceGetAttribute(&memory_pools_supported, hipDeviceAttributeMemoryPoolsSupported, device_id); + AMREX_HIP_SAFE_CALL(hipDeviceGetAttribute(&memory_pools_supported, hipDeviceAttributeMemoryPoolsSupported, device_id)); #endif #elif defined(AMREX_USE_CUDA)