From c408a3275b5e223f4b8deeab23393808a07c182e Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Mon, 26 Feb 2024 12:38:29 -0800 Subject: [PATCH] [FEA]: Use `std::optional` instead of `thrust::optional` (#1464) We already require c++17, so there is no benefit on pulling in `thrust::optional` as a dependency. In addition thrust versiones its types with the cuda architecture, so using those types in API boundaries is not recommended. This could be a potentially breaking change if a user would explicitly pass in `thrust::optional` into those APIs which seems unlikely Authors: - Michael Schellenberger Costa (https://github.com/miscco) - Bradley Dice (https://github.com/bdice) Approvers: - Mark Harris (https://github.com/harrism) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/rmm/pull/1464 --- .../mr/device/cuda_async_memory_resource.hpp | 42 +++++- .../rmm/mr/device/pool_memory_resource.hpp | 142 ++++++++++++++++-- python/rmm/_lib/memory_resource.pyx | 20 +-- 3 files changed, 174 insertions(+), 30 deletions(-) diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index 1bd761eb2..ac6b72076 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -23,11 +23,13 @@ #include #include +#include #include #include #include #include +#include #if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync #ifndef RMM_DISABLE_CUDA_MALLOC_ASYNC @@ -84,9 +86,43 @@ class cuda_async_memory_resource final : public device_memory_resource { * `cudaMemHandleTypeNone` for no IPC support. */ // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) - cuda_async_memory_resource(thrust::optional initial_pool_size = {}, - thrust::optional release_threshold = {}, - thrust::optional export_handle_type = {}) + template , + thrust::optional>, + int> = 0> + [[deprecated("Use std::optional instead of thrust::optional.")]] // + explicit cuda_async_memory_resource( + Optional initial_pool_size, + Optional release_threshold = {}, + thrust::optional export_handle_type = {}) + : cuda_async_memory_resource(initial_pool_size.value_or(std::nullopt), + release_threshold.value_or(std::nullopt), + export_handle_type.value_or(std::nullopt)) + + { + } + + /** + * @brief Constructs a cuda_async_memory_resource with the optionally specified initial pool size + * and release threshold. + * + * If the pool size grows beyond the release threshold, unused memory held by the pool will be + * released at the next synchronization event. + * + * @throws rmm::logic_error if the CUDA version does not support `cudaMallocAsync` + * + * @param initial_pool_size Optional initial size in bytes of the pool. If no value is provided, + * initial pool size is half of the available GPU memory. + * @param release_threshold Optional release threshold size in bytes of the pool. If no value is + * provided, the release threshold is set to the total amount of memory on the current device. + * @param export_handle_type Optional `cudaMemAllocationHandleType` that allocations from this + * resource should support interprocess communication (IPC). Default is + * `cudaMemHandleTypeNone` for no IPC support. + */ + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + cuda_async_memory_resource(std::optional initial_pool_size = {}, + std::optional release_threshold = {}, + std::optional export_handle_type = {}) { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT // Check if cudaMallocAsync Memory pool supported diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 4f2d46a7c..cbd499021 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -108,6 +110,37 @@ class pool_memory_resource final friend class detail::stream_ordered_memory_resource, detail::coalescing_free_list>; + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory + * pool using `upstream_mr`. + * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available memory from the upstream resource. + */ + template , + thrust::optional>, + int> = 0> + [[deprecated( + "Must specify initial_pool_size and use std::optional instead of thrust::optional.")]] // + explicit pool_memory_resource(Upstream* upstream_mr, + Optional initial_pool_size, + Optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource( + upstream_mr, initial_pool_size.value_or(0), maximum_pool_size.value_or(std::nullopt)) + { + } + /** * @brief Construct a `pool_memory_resource` and allocate the initial device memory * pool using `upstream_mr`. @@ -127,12 +160,43 @@ class pool_memory_resource final */ [[deprecated("Must specify initial_pool_size")]] // explicit pool_memory_resource(Upstream* upstream_mr, - thrust::optional initial_pool_size = thrust::nullopt, - thrust::optional maximum_pool_size = thrust::nullopt) + std::optional initial_pool_size = std::nullopt, + std::optional maximum_pool_size = std::nullopt) : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) { } + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using + * `upstream_mr`. + * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available memory from the upstream resource. + */ + template , + thrust::optional>, + int> = 0> + [[deprecated( + "Must specify initial_pool_size and use std::optional instead of thrust::optional.")]] // + explicit pool_memory_resource(Upstream& upstream_mr, + Optional initial_pool_size, + Optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource( + upstream_mr, initial_pool_size.value_or(0), maximum_pool_size.value_or(std::nullopt)) + { + } + /** * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. @@ -154,12 +218,39 @@ class pool_memory_resource final cuda::std::enable_if_t, int> = 0> [[deprecated("Must specify initial_pool_size")]] // explicit pool_memory_resource(Upstream2& upstream_mr, - thrust::optional initial_pool_size = thrust::nullopt, - thrust::optional maximum_pool_size = thrust::nullopt) + std::optional initial_pool_size = std::nullopt, + std::optional maximum_pool_size = std::nullopt) : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) { } + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using + * `upstream_mr`. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available from the upstream resource. + */ + template , + thrust::optional>, + int> = 0> + [[deprecated("Use std::optional instead of thrust::optional.")]] // + explicit pool_memory_resource(Upstream* upstream_mr, + std::size_t initial_pool_size, + Optional maximum_pool_size) + : pool_memory_resource(upstream_mr, initial_pool_size, maximum_pool_size.value_or(std::nullopt)) + { + } + /** * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. @@ -177,7 +268,7 @@ class pool_memory_resource final */ explicit pool_memory_resource(Upstream* upstream_mr, std::size_t initial_pool_size, - thrust::optional maximum_pool_size = thrust::nullopt) + std::optional maximum_pool_size = std::nullopt) : upstream_mr_{[upstream_mr]() { RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer."); return upstream_mr; @@ -191,6 +282,35 @@ class pool_memory_resource final initialize_pool(initial_pool_size, maximum_pool_size); } + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using + * `upstream_mr`. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available memory from the upstream resource. + */ + template , + thrust::optional>, + int> = 0> + [[deprecated("Use std::optional instead of thrust::optional.")]] // + explicit pool_memory_resource(Upstream& upstream_mr, + std::size_t initial_pool_size, + Optional maximum_pool_size) + : pool_memory_resource(cuda::std::addressof(upstream_mr), + initial_pool_size, + maximum_pool_size.value_or(std::nullopt)) + { + } + /** * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. @@ -210,7 +330,7 @@ class pool_memory_resource final cuda::std::enable_if_t, int> = 0> explicit pool_memory_resource(Upstream2& upstream_mr, std::size_t initial_pool_size, - thrust::optional maximum_pool_size = thrust::nullopt) + std::optional maximum_pool_size = std::nullopt) : pool_memory_resource(cuda::std::addressof(upstream_mr), initial_pool_size, maximum_pool_size) { } @@ -312,7 +432,7 @@ class pool_memory_resource final * * @throws logic_error if @p initial_size is larger than @p maximum_size (if set). */ - void initialize_pool(std::size_t initial_size, thrust::optional maximum_size) + void initialize_pool(std::size_t initial_size, std::optional maximum_size) { current_pool_size_ = 0; // try_to_expand will set this if it succeeds maximum_pool_size_ = maximum_size; @@ -376,7 +496,7 @@ class pool_memory_resource final * @param stream The stream on which the memory is to be used. * @return block_type The allocated block */ - thrust::optional block_from_upstream(std::size_t size, cuda_stream_view stream) + std::optional block_from_upstream(std::size_t size, cuda_stream_view stream) { RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size); @@ -384,10 +504,10 @@ class pool_memory_resource final try { void* ptr = get_upstream()->allocate_async(size, stream); - return thrust::optional{ + return std::optional{ *upstream_blocks_.emplace(static_cast(ptr), size, true).first}; } catch (std::exception const& e) { - return thrust::nullopt; + return std::nullopt; } } @@ -515,7 +635,7 @@ class pool_memory_resource final private: Upstream* upstream_mr_; // The "heap" to allocate the pool from std::size_t current_pool_size_{}; - thrust::optional maximum_pool_size_{}; + std::optional maximum_pool_size_{}; #ifdef RMM_POOL_TRACK_ALLOCATIONS std::set> allocated_blocks_; diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 690e2e338..0d68bc4bc 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -25,6 +25,7 @@ from libc.stddef cimport size_t from libc.stdint cimport int8_t, int64_t, uintptr_t from libcpp cimport bool from libcpp.memory cimport make_unique, unique_ptr +from libcpp.optional cimport optional from libcpp.pair cimport pair from libcpp.string cimport string @@ -76,19 +77,6 @@ cdef extern from *: # NOTE: Keep extern declarations in .pyx file as much as possible to avoid # leaking dependencies when importing RMM Cython .pxd files -cdef extern from "thrust/optional.h" namespace "thrust" nogil: - - struct nullopt_t: - pass - - cdef nullopt_t nullopt - - cdef cppclass optional[T]: - optional() - optional(T v) - - cdef optional[T] make_optional[T](T v) - cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass cuda_memory_resource(device_memory_resource): @@ -322,13 +310,13 @@ cdef class CudaAsyncMemoryResource(DeviceMemoryResource): cdef optional[size_t] c_initial_pool_size = ( optional[size_t]() if initial_pool_size is None - else optional[size_t](initial_pool_size) + else optional[size_t]( initial_pool_size) ) cdef optional[size_t] c_release_threshold = ( optional[size_t]() if release_threshold is None - else optional[size_t](release_threshold) + else optional[size_t]( release_threshold) ) # If IPC memory handles are not supported, the constructor below will @@ -382,7 +370,7 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor): c_maximum_pool_size = ( optional[size_t]() if maximum_pool_size is None - else make_optional[size_t](maximum_pool_size) + else optional[size_t]( maximum_pool_size) ) self.c_obj.reset( new pool_memory_resource[device_memory_resource](