From f2e4e534218fbb6bda0c156efa082f5623e9f291 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Thu, 9 Nov 2023 16:27:16 -0500 Subject: [PATCH 01/23] v24.02 Updates [skip ci] --- .../cuda11.8-conda/devcontainer.json | 4 ++-- .devcontainer/cuda11.8-pip/devcontainer.json | 4 ++-- .../cuda12.0-conda/devcontainer.json | 4 ++-- .devcontainer/cuda12.0-pip/devcontainer.json | 4 ++-- .github/workflows/build.yaml | 12 +++++------ .github/workflows/pr.yaml | 20 +++++++++---------- .github/workflows/test.yaml | 6 +++--- CMakeLists.txt | 2 +- VERSION | 2 +- ci/build_docs.sh | 2 +- ci/check_style.sh | 2 +- doxygen/Doxyfile | 2 +- fetch_rapids.cmake | 2 +- python/CMakeLists.txt | 2 +- python/docs/conf.py | 4 ++-- 15 files changed, 36 insertions(+), 36 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index f8546a341..663d0072c 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:23.12-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.02-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index b1e33502f..8bdde6a21 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:23.12-cpp-llvm16-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json index 4834422eb..1191efca2 100644 --- a/.devcontainer/cuda12.0-conda/devcontainer.json +++ b/.devcontainer/cuda12.0-conda/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "12.0", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:23.12-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.02-cpp-mambaforge-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json index f40db9312..f7f8ee591 100644 --- a/.devcontainer/cuda12.0-pip/devcontainer.json +++ b/.devcontainer/cuda12.0-pip/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "12.0", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:23.12-cpp-llvm16-cuda12.0-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda12.0-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index be052881c..2f2b1464f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -56,7 +56,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -68,7 +68,7 @@ jobs: run_script: "ci/build_docs.sh" wheel-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -78,7 +78,7 @@ jobs: wheel-publish: needs: wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index aecbb9d9e..397ebfd15 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -22,40 +22,40 @@ jobs: - wheel-tests - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02 with: build_type: pull-request conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -65,20 +65,20 @@ jobs: wheel-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 with: build_type: pull-request script: ci/build_wheel.sh wheel-tests: needs: wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: build_type: pull-request script: ci/test_wheel.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02 with: build_command: | sccache -z; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 993852c94..a61209d0a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c883bc4c..e780ad5c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ include(rapids-find) project( RMM - VERSION 23.12.00 + VERSION 24.02.00 LANGUAGES CXX) # Write the version header diff --git a/VERSION b/VERSION index a193fff41..3c6c5e2b7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -23.12.00 +24.02.00 diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 7a23b8c83..e11a9f35b 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -25,7 +25,7 @@ rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ rmm librmm -export RAPIDS_VERSION_NUMBER="23.12" +export RAPIDS_VERSION_NUMBER="24.02" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build CPP docs" diff --git a/ci/check_style.sh b/ci/check_style.sh index a01cf4dcc..9eed00298 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -14,7 +14,7 @@ rapids-dependency-file-generator \ rapids-mamba-retry env create --force -f env.yaml -n checks conda activate checks -FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/cmake-format-rapids-cmake.json +FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/cmake-format-rapids-cmake.json export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile index 4e8f339b3..e956411f2 100644 --- a/doxygen/Doxyfile +++ b/doxygen/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = RMM # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 23.12 +PROJECT_NUMBER = 24.02 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 899db0467..86d447acb 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # ============================================================================= if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake) endif() include(${CMAKE_CURRENT_BINARY_DIR}/RMM_RAPIDS.cmake) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 475fa9434..35f034421 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) -set(rmm_version 23.12.00) +set(rmm_version 24.02.00) include(../fetch_rapids.cmake) diff --git a/python/docs/conf.py b/python/docs/conf.py index a063b52eb..88bfee344 100644 --- a/python/docs/conf.py +++ b/python/docs/conf.py @@ -24,9 +24,9 @@ # built documents. # # The short X.Y version. -version = "23.12" +version = "24.02" # The full version, including alpha/beta/rc tags. -release = "23.12.00" +release = "24.02.00" # -- General configuration --------------------------------------------------- From da793c5471e2483232df718c8cb1534de8b7c0eb Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Wed, 29 Nov 2023 16:37:23 -0600 Subject: [PATCH 02/23] Fix Arena MR to support simultaneous access by PTDS and other streams (#1395) Replaces https://github.com/rapidsai/rmm/pull/1394, this is targeted for 24.02. fixes https://github.com/rapidsai/rmm/issues/1393 In Spark with the Spark Rapids accelerator using cudf 23.12 snapshot we have an application that is reading ORC files, doing some light processing and then writing ORC files. It consistently fails while doing the ORC write with: ``` terminate called after throwing an instance of 'rmm::logic_error' what(): RMM failure at:/home/jenkins/agent/workspace/jenkins-spark-rapids-jni_nightly-dev-594-cuda11/thirdparty/cudf/cpp/build/_deps/rmm-src/include/rmm/mr/device/arena_memory_resource.hpp:238: allocation not found ``` The underlying issue is brought about because Spark with the Rapids accelerate is using ARENA allocator with per default streams enabled. CUDF recently added its own stream pool that is used in addition to when per default streams are used. It's now possible to use per thread default streams along with another pool of streams. This means that it's possible for an arena to move from a thread or stream arena back into the global arena during a defragmentation and then move down into another arena type. For instance, thread arena -> global arena -> stream arena. If this happens and there was an allocation from it while it was a thread arena, we now have to check to see if the allocation is part of a stream arena. I added a test here. I was trying to make sure that all the allocations were now in stream arenas, if there is a better way to do this please let me know. Authors: - Thomas Graves (https://github.com/tgravescs) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Bradley Dice (https://github.com/bdice) - Rong Ou (https://github.com/rongou) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/rmm/pull/1395 --- .../rmm/mr/device/arena_memory_resource.hpp | 21 ++++++++++++- tests/mr/device/arena_mr_tests.cpp | 31 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 0dbd9c90e..929b8454f 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -235,7 +235,26 @@ class arena_memory_resource final : public device_memory_resource { } } - if (!global_arena_.deallocate(ptr, bytes)) { RMM_FAIL("allocation not found"); } + if (!global_arena_.deallocate(ptr, bytes)) { + // It's possible to use per thread default streams along with another pool of streams. + // This means that it's possible for an allocation to move from a thread or stream arena + // back into the global arena during a defragmentation and then move down into another arena + // type. For instance, thread arena -> global arena -> stream arena. If this happens and + // there was an allocation from it while it was a thread arena, we now have to check to + // see if the allocation is part of a stream arena, and vice versa. + // Only do this in exceptional cases to not affect performance and have to check all + // arenas all the time. + if (use_per_thread_arena(stream)) { + for (auto& stream_arena : stream_arenas_) { + if (stream_arena.second.deallocate(ptr, bytes)) { return; } + } + } else { + for (auto const& thread_arena : thread_arenas_) { + if (thread_arena.second->deallocate(ptr, bytes)) { return; } + } + } + RMM_FAIL("allocation not found"); + } } /** diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 48967d06a..7525cac9f 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -533,6 +533,37 @@ TEST_F(ArenaTest, Defragment) // NOLINT }()); } +TEST_F(ArenaTest, PerThreadToStreamDealloc) // NOLINT +{ + // This is testing that deallocation of a ptr still works when + // it was originally allocated in a superblock that was in a thread + // arena that then moved to global arena during a defragmentation + // and then moved to a stream arena. + auto const arena_size = superblock::minimum_size * 2; + arena_mr mr(rmm::mr::get_current_device_resource(), arena_size); + // Create an allocation from a per thread arena + void* thread_ptr = mr.allocate(256, rmm::cuda_stream_per_thread); + // Create an allocation in a stream arena to force global arena + // to be empty + cuda_stream stream{}; + void* ptr = mr.allocate(32_KiB, stream); + mr.deallocate(ptr, 32_KiB, stream); + // at this point the global arena doesn't have any superblocks so + // the next allocation causes defrag. Defrag causes all superblocks + // from the thread and stream arena allocated above to go back to + // global arena and it allocates one superblock to the stream arena. + auto* ptr1 = mr.allocate(superblock::minimum_size, rmm::cuda_stream_view{}); + // Allocate again to make sure all superblocks from + // global arena are owned by a stream arena instead of a thread arena + // or the global arena. + auto* ptr2 = mr.allocate(32_KiB, rmm::cuda_stream_view{}); + // The original thread ptr is now owned by a stream arena so make + // sure deallocation works. + mr.deallocate(thread_ptr, 256, rmm::cuda_stream_per_thread); + mr.deallocate(ptr1, superblock::minimum_size, rmm::cuda_stream_view{}); + mr.deallocate(ptr2, 32_KiB, rmm::cuda_stream_view{}); +} + TEST_F(ArenaTest, DumpLogOnFailure) // NOLINT { arena_mr mr{rmm::mr::get_current_device_resource(), 1_MiB, true}; From ee3817cdde594f19bc665d524b0a3d1d1b588da9 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 30 Nov 2023 10:38:02 +1000 Subject: [PATCH 03/23] Fix else-after-throw clang tidy error (#1391) Updates code for `RMM_CUDA_TRY_ALLOC` macro in `detail/error.hpp` to eliminate a clang-tidy error. Authors: - Mark Harris (https://github.com/harrism) Approvers: - Rong Ou (https://github.com/rongou) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/rmm/pull/1391 --- include/rmm/detail/error.hpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp index 6f74dc0ea..bce0d1389 100644 --- a/include/rmm/detail/error.hpp +++ b/include/rmm/detail/error.hpp @@ -230,11 +230,8 @@ class out_of_range : public std::out_of_range { cudaGetLastError(); \ auto const msg = std::string{"CUDA error at: "} + __FILE__ + ":" + RMM_STRINGIFY(__LINE__) + \ ": " + cudaGetErrorName(error) + " " + cudaGetErrorString(error); \ - if (cudaErrorMemoryAllocation == error) { \ - throw rmm::out_of_memory{msg}; \ - } else { \ - throw rmm::bad_alloc{msg}; \ - } \ + if (cudaErrorMemoryAllocation == error) { throw rmm::out_of_memory{msg}; } \ + throw rmm::bad_alloc{msg}; \ } \ } while (0) From d8579b70d7b2ee4714b4e011f266863c3095f0da Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Nov 2023 10:47:18 -0600 Subject: [PATCH 04/23] Update to fmt 10.1.1 and spdlog 1.12.0. (#1374) This PR updates to fmt 10.1.1 and spdlog 1.12. Depends on https://github.com/rapidsai/rapids-cmake/pull/473. Closes https://github.com/rapidsai/rmm/issues/1356 Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/rmm/pull/1374 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 4 ++-- conda/environments/all_cuda-120_arch-x86_64.yaml | 4 ++-- conda/recipes/librmm/conda_build_config.yaml | 4 ++-- dependencies.yaml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 3a5cb90af..09f3ebeed 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -16,7 +16,7 @@ dependencies: - cxx-compiler - cython>=3.0.0 - doxygen=1.9.1 -- fmt>=9.1.0,<10 +- fmt>=10.1.1,<11 - gcc_linux-64=11.* - gcovr>=5.0 - graphviz @@ -34,7 +34,7 @@ dependencies: - pytest-cov - python>=3.9,<3.11 - scikit-build>=0.13.1 -- spdlog>=1.11.0,<1.12 +- spdlog>=1.12.0,<1.13 - sphinx - sphinx-copybutton - sphinx-markdown-tables diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index f87564b74..40f1871c2 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cxx-compiler - cython>=3.0.0 - doxygen=1.9.1 -- fmt>=9.1.0,<10 +- fmt>=10.1.1,<11 - gcc_linux-64=11.* - gcovr>=5.0 - graphviz @@ -32,7 +32,7 @@ dependencies: - pytest-cov - python>=3.9,<3.11 - scikit-build>=0.13.1 -- spdlog>=1.11.0,<1.12 +- spdlog>=1.12.0,<1.13 - sphinx - sphinx-copybutton - sphinx-markdown-tables diff --git a/conda/recipes/librmm/conda_build_config.yaml b/conda/recipes/librmm/conda_build_config.yaml index 59c0a210e..ed58ac507 100644 --- a/conda/recipes/librmm/conda_build_config.yaml +++ b/conda/recipes/librmm/conda_build_config.yaml @@ -14,13 +14,13 @@ cmake_version: - ">=3.26.4" fmt_version: - - ">=9.1.0,<10" + - ">=10.1.1,<11" gtest_version: - ">=1.13.0" spdlog_version: - - ">=1.11.0,<1.12" + - ">=1.12.0,<1.13" sysroot_version: - "2.17" diff --git a/dependencies.yaml b/dependencies.yaml index af3f49ef0..805159e62 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -71,8 +71,8 @@ dependencies: packages: - c-compiler - cxx-compiler - - fmt>=9.1.0,<10 - - spdlog>=1.11.0,<1.12 + - fmt>=10.1.1,<11 + - spdlog>=1.12.0,<1.13 - python>=3.9,<3.11 - output_types: pyproject packages: From cd63a08d3cfb896894a54de9c9fcff5be4b120a0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 6 Dec 2023 12:36:18 -0800 Subject: [PATCH 05/23] Remove RMM_BUILD_WHEELS and standardize Python builds (#1401) Some minor simplification in advance of the scikit-build-core migration to better align wheel and non-wheel Python builds. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/rmm/pull/1401 --- ci/build_wheel.sh | 2 +- python/CMakeLists.txt | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 6d887e5d2..b2d953361 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -31,7 +31,7 @@ fi cd "${package_dir}" -SKBUILD_CONFIGURE_OPTIONS="-DRMM_BUILD_WHEELS=ON" python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check +python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check mkdir -p final_dist python -m auditwheel repair -w final_dist dist/* diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 35f034421..dc18af3fd 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -29,7 +29,6 @@ project( option(FIND_RMM_CPP "Search for existing RMM C++ installations before defaulting to local files" OFF) -option(RMM_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) # If the user requested it we attempt to find RMM. if(FIND_RMM_CPP) @@ -41,16 +40,9 @@ endif() if(NOT rmm_FOUND) set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) + set(CUDA_STATIC_RUNTIME ON) - set(_exclude_from_all "") - if(RMM_BUILD_WHEELS) - # Statically link dependencies if building wheels - set(CUDA_STATIC_RUNTIME ON) - # Don't install the rmm C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../ rmm-cpp ${_exclude_from_all}) + add_subdirectory(../ rmm-cpp EXCLUDE_FROM_ALL) endif() include(rapids-cython) From 53c80437d80d03b7e07e864a57c9ace55d8736f2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 11 Dec 2023 11:34:08 -0600 Subject: [PATCH 06/23] Use latest cuda-python within CUDA major version. (#1406) This PR updates cuda-python. The CUDA 11 build was locked to an outdated version (11.7.1). This matches the specifications in dependencies.yaml and also cudf recipes. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/rmm/pull/1406 --- conda/recipes/rmm/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml index b3457d922..477927e27 100644 --- a/conda/recipes/rmm/meta.yaml +++ b/conda/recipes/rmm/meta.yaml @@ -53,10 +53,10 @@ requirements: - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python ==11.7.1 + - cuda-python >=11.7.1,<12.0a0 {% else %} - cuda-cudart-dev - - cuda-python ==12.0.0 + - cuda-python >=12.0,<13.0a0 {% endif %} - cython >=3.0.0 - librmm ={{ version }} From 57ed533b4f3cf3ccff9b9d2412c445212ebf0861 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 13 Dec 2023 15:08:50 -0800 Subject: [PATCH 07/23] Switch to scikit-build-core (#1287) Contributes to rapidsai/build-planning#2 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/rmm/pull/1287 --- README.md | 2 +- build.sh | 2 +- .../all_cuda-118_arch-x86_64.yaml | 3 +-- .../all_cuda-120_arch-x86_64.yaml | 3 +-- conda/recipes/rmm/meta.yaml | 4 +--- dependencies.yaml | 8 +++---- python/CMakeLists.txt | 8 ++----- python/pyproject.toml | 22 +++++++++++-------- python/setup.py | 11 ---------- 9 files changed, 23 insertions(+), 40 deletions(-) delete mode 100644 python/setup.py diff --git a/README.md b/README.md index 56bc86632..a5c626253 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ CUDA/GPU requirements: You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) Python requirements: -* `scikit-build` +* `scikit-build-core` * `cuda-python` * `cython` diff --git a/build.sh b/build.sh index ed37b5973..ec08fa402 100755 --- a/build.sh +++ b/build.sh @@ -176,5 +176,5 @@ fi # Build and install the rmm Python package if (( NUMARGS == 0 )) || hasArg rmm; then echo "building and installing rmm..." - SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" python -m pip install --no-build-isolation --no-deps ${REPODIR}/python + SKBUILD_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS}" python -m pip install --no-build-isolation --no-deps ${REPODIR}/python fi diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 09f3ebeed..595c9fae3 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -33,12 +33,11 @@ dependencies: - pytest - pytest-cov - python>=3.9,<3.11 -- scikit-build>=0.13.1 +- scikit-build-core>=0.7.0 - spdlog>=1.12.0,<1.13 - sphinx - sphinx-copybutton - sphinx-markdown-tables - sphinx_rtd_theme - sysroot_linux-64==2.17 -- tomli name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 40f1871c2..2f9c9297c 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -31,12 +31,11 @@ dependencies: - pytest - pytest-cov - python>=3.9,<3.11 -- scikit-build>=0.13.1 +- scikit-build-core>=0.7.0 - spdlog>=1.12.0,<1.13 - sphinx - sphinx-copybutton - sphinx-markdown-tables - sphinx_rtd_theme - sysroot_linux-64==2.17 -- tomli name: all_cuda-120_arch-x86_64 diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml index 477927e27..1e50853b6 100644 --- a/conda/recipes/rmm/meta.yaml +++ b/conda/recipes/rmm/meta.yaml @@ -61,9 +61,7 @@ requirements: - cython >=3.0.0 - librmm ={{ version }} - python - - scikit-build >=0.13.1 - - setuptools >=61.0.0 - - tomli # [py<311] + - scikit-build-core >=0.7.0 run: {% if cuda_major == "11" %} - cudatoolkit diff --git a/dependencies.yaml b/dependencies.yaml index 805159e62..336852d53 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -65,19 +65,17 @@ dependencies: - &cmake_ver cmake>=3.26.4 - cython>=3.0.0 - ninja - - scikit-build>=0.13.1 - - tomli - output_types: conda packages: - c-compiler - cxx-compiler - fmt>=10.1.1,<11 + - scikit-build-core>=0.7.0 - spdlog>=1.12.0,<1.13 - python>=3.9,<3.11 - - output_types: pyproject + - output_types: [requirements, pyproject] packages: - - wheel - - setuptools>=61.0.0 + - scikit-build-core[pyproject]>=0.7.0 specific: - output_types: conda matrices: diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index dc18af3fd..635c4d3b5 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -21,11 +21,7 @@ include(../fetch_rapids.cmake) project( rmm-python VERSION ${rmm_version} - LANGUAGES # TODO: Building Python extension modules via the python_extension_module requires the C - # language to be enabled here. The test project that is built in scikit-build to verify - # various linking options for the python library is hardcoded to build with C, so until - # that is fixed we need to keep C. - C CXX) + LANGUAGES CXX) option(FIND_RMM_CPP "Search for existing RMM C++ installations before defaulting to local files" OFF) @@ -45,7 +41,7 @@ if(NOT rmm_FOUND) add_subdirectory(../ rmm-cpp EXCLUDE_FROM_ALL) endif() -include(rapids-cython) +include(rapids-cython-core) rapids_cython_init() add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") diff --git a/python/pyproject.toml b/python/pyproject.toml index 751d9a674..1e901b1ab 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -13,16 +13,13 @@ # limitations under the License. [build-system] -build-backend = "setuptools.build_meta" +build-backend = "scikit_build_core.build" requires = [ "cmake>=3.26.4", "cuda-python>=11.7.1,<12.0a0", "cython>=3.0.0", "ninja", - "scikit-build>=0.13.1", - "setuptools>=61.0.0", - "tomli", - "wheel", + "scikit-build-core[pyproject]>=0.7.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../dependencies.yaml and run `rapids-dependency-file-generator`. [project] @@ -112,8 +109,15 @@ skip = [ "__init__.py", ] -[tool.setuptools] -license-files = ["LICENSE"] +[tool.scikit-build] +build-dir = "build/{wheel_tag}" +cmake.build-type = "Release" +cmake.minimum-version = "3.26.4" +ninja.make-fallback = true +sdist.reproducible = true +wheel.packages = ["rmm"] -[tool.setuptools.dynamic] -version = {file = "rmm/VERSION"} +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "rmm/VERSION" +regex = "(?P.*)" diff --git a/python/setup.py b/python/setup.py deleted file mode 100644 index af5750044..000000000 --- a/python/setup.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. - -from setuptools import find_packages -from skbuild import setup - -packages = find_packages(include=["rmm*"]) -setup( - packages=packages, - package_data={key: ["VERSION", "*.pxd"] for key in packages}, - zip_safe=False, -) From 362b9352bb71145ef6c449697dd4e81ecffb2475 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 13 Dec 2023 19:07:34 -0600 Subject: [PATCH 08/23] Define python dependency range as a matrix fallback. (#1409) This PR moves the definition of `python>=3.9,<3.11` into the `py_version` dependency list, under the empty (fallback) matrix. This change aligns RMM's `dependencies.yaml` with other RAPIDS repositories. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/rmm/pull/1409 --- dependencies.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index 336852d53..0be702c60 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -11,6 +11,7 @@ files: - cudatoolkit - develop - docs + - py_version - run - test_python test_python: @@ -72,7 +73,6 @@ dependencies: - fmt>=10.1.1,<11 - scikit-build-core>=0.7.0 - spdlog>=1.12.0,<1.13 - - python>=3.9,<3.11 - output_types: [requirements, pyproject] packages: - scikit-build-core[pyproject]>=0.7.0 @@ -195,6 +195,9 @@ dependencies: py: "3.10" packages: - python=3.10 + - matrix: + packages: + - python>=3.9,<3.11 run: common: - output_types: [conda, requirements, pyproject] From 0b931f6615ee31d54b26810a1eb5223f4ac57e31 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 14 Dec 2023 12:29:59 +0000 Subject: [PATCH 09/23] Correct signatures for torch allocator plug in (#1407) Since https://github.com/pytorch/pytorch/pull/91398, the signature of the pluggable allocate and deallocate functions must accept the device id. The current version only accepts a device id for allocate, which means that when using a stream ordered allocator with devices other than device zero, we pass an invalid stream into the deallocation function. To fix this, adapt the signature to match the one pytorch expects. Now, since we have the device available during allocation and deallocation, we would like to use that device to obtain the appropriate memory resource. Unfortunately, since RMM's cuda_device_id does not have a nullary constructor, we can't use it in Cython without some hacky workarounds. However, since we don't actually need to build a Python module, but rather just a single shared library that offers two extern "C" functions, let's just write our allocator hooks directly in C++. - Closes #1405 Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/rmm/pull/1407 --- .gitignore | 1 + python/rmm/_lib/CMakeLists.txt | 13 ++++-- python/rmm/_lib/_torch_allocator.cpp | 60 ++++++++++++++++++++++++++++ python/rmm/_lib/torch_allocator.pyx | 24 ----------- python/rmm/allocators/torch.py | 9 +++-- 5 files changed, 76 insertions(+), 31 deletions(-) create mode 100644 python/rmm/_lib/_torch_allocator.cpp delete mode 100644 python/rmm/_lib/torch_allocator.pyx diff --git a/.gitignore b/.gitignore index af14a9534..f4993502b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ rmm.egg-info/ python/build python/*/build python/rmm/**/_lib/**/*.cpp +!python/rmm/_lib/_torch_allocator.cpp python/rmm/**/_lib/**/*.h python/rmm/**/_lib/.nfs* python/rmm/_cuda/*.cpp diff --git a/python/rmm/_lib/CMakeLists.txt b/python/rmm/_lib/CMakeLists.txt index 852dd87c4..980217f0c 100644 --- a/python/rmm/_lib/CMakeLists.txt +++ b/python/rmm/_lib/CMakeLists.txt @@ -12,12 +12,17 @@ # the License. # ============================================================================= -set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx - torch_allocator.pyx) +set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx) set(linked_libraries rmm::rmm) # Build all of the Cython targets rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" CXX) -# The cdef public functions in this file need to have a C ABI -target_compile_definitions(torch_allocator PRIVATE CYTHON_EXTERN_C=extern\ "C") + +add_library(_torch_allocator SHARED _torch_allocator.cpp) +# Want the output to be called _torch_allocator.so +set_target_properties(_torch_allocator PROPERTIES PREFIX "" SUFFIX ".so") +target_link_libraries(_torch_allocator PRIVATE rmm::rmm) +cmake_path(RELATIVE_PATH CMAKE_CURRENT_SOURCE_DIR BASE_DIRECTORY "${PROJECT_SOURCE_DIR}" + OUTPUT_VARIABLE _torch_allocator_location) +install(TARGETS _torch_allocator DESTINATION "${_torch_allocator_location}") diff --git a/python/rmm/_lib/_torch_allocator.cpp b/python/rmm/_lib/_torch_allocator.cpp new file mode 100644 index 000000000..5e5b2dcb4 --- /dev/null +++ b/python/rmm/_lib/_torch_allocator.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +// These signatures must match those required by CUDAPluggableAllocator in +// github.com/pytorch/pytorch/blob/main/torch/csrc/cuda/CUDAPluggableAllocator.h +// Since the loading is done at runtime via dlopen, no error checking +// can be performed for mismatching signatures. + +/** + * @brief Allocate memory of at least \p size bytes. + * + * @throws rmm::bad_alloc When the requested allocation cannot be satisfied. + * + * @param size The number of bytes to allocate + * @param device The device whose memory resource one should use + * @param stream CUDA stream to perform allocation on + * @return Pointer to the newly allocated memory + */ +extern "C" void* allocate(std::size_t size, int device, void* stream) +{ + rmm::cuda_device_id const device_id{device}; + rmm::cuda_set_device_raii with_device{device_id}; + auto mr = rmm::mr::get_per_device_resource(device_id); + return mr->allocate(size, rmm::cuda_stream_view{static_cast(stream)}); +} + +/** + * @brief Deallocate memory pointed to by \p ptr. + * + * @param ptr Pointer to be deallocated + * @param size The number of bytes in the allocation + * @param device The device whose memory resource one should use + * @param stream CUDA stream to perform deallocation on + */ +extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream) +{ + rmm::cuda_device_id const device_id{device}; + rmm::cuda_set_device_raii with_device{device_id}; + auto mr = rmm::mr::get_per_device_resource(device_id); + mr->deallocate(ptr, size, rmm::cuda_stream_view{static_cast(stream)}); +} diff --git a/python/rmm/_lib/torch_allocator.pyx b/python/rmm/_lib/torch_allocator.pyx deleted file mode 100644 index 12dc9fe11..000000000 --- a/python/rmm/_lib/torch_allocator.pyx +++ /dev/null @@ -1,24 +0,0 @@ -from cuda.ccudart cimport cudaStream_t - -from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.memory_resource cimport device_memory_resource -from rmm._lib.per_device_resource cimport get_current_device_resource - - -cdef public void* allocate( - ssize_t size, int device, void* stream -) except * with gil: - cdef device_memory_resource* mr = get_current_device_resource() - cdef cuda_stream_view stream_view = cuda_stream_view( - (stream) - ) - return mr[0].allocate(size, stream_view) - -cdef public void deallocate( - void* ptr, ssize_t size, void* stream -) except * with gil: - cdef device_memory_resource* mr = get_current_device_resource() - cdef cuda_stream_view stream_view = cuda_stream_view( - (stream) - ) - mr[0].deallocate(ptr, size, stream_view) diff --git a/python/rmm/allocators/torch.py b/python/rmm/allocators/torch.py index 65b310a89..35081f7f5 100644 --- a/python/rmm/allocators/torch.py +++ b/python/rmm/allocators/torch.py @@ -16,11 +16,14 @@ except ImportError: rmm_torch_allocator = None else: - import rmm._lib.torch_allocator + import pathlib - _alloc_free_lib_path = rmm._lib.torch_allocator.__file__ + sofile = ( + pathlib.Path(__file__).parent.parent / "_lib" / "_torch_allocator.so" + ) rmm_torch_allocator = CUDAPluggableAllocator( - _alloc_free_lib_path, + str(sofile.absolute()), alloc_fn_name="allocate", free_fn_name="deallocate", ) + del pathlib, sofile From ec7b164419809afe7edfcecf89c863681d373d7b Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Thu, 14 Dec 2023 09:41:51 -0500 Subject: [PATCH 10/23] Update CODEOWNERS (#1410) --- .github/CODEOWNERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 3c02b406c..c9a4cba83 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -14,5 +14,4 @@ python/ @rapidsai/rmm-python-codeowners .github/ @rapidsai/ops-codeowners ci/ @rapidsai/ops-codeowners conda/ @rapidsai/ops-codeowners -**/Dockerfile @rapidsai/ops-codeowners -**/.dockerignore @rapidsai/ops-codeowners +dependencies.yaml @rapidsai/ops-codeowners From cf146178b532dbacc88fdf5cadb1b351f0bdec5b Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Mon, 18 Dec 2023 15:36:47 +1000 Subject: [PATCH 11/23] Update GPU support docs to drop Pascal (#1413) We are dropping Pascal support in 24.02 (see https://github.com/rapidsai/rapids-cmake/pull/482) This PR changes the way we document GPU support in RMM to explain what is tested and supported rather than what is required (since it may work on earlier hardware than we test/support). Authors: - Mark Harris (https://github.com/harrism) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/rmm/pull/1413 --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a5c626253..5aa7daab9 100644 --- a/README.md +++ b/README.md @@ -61,10 +61,12 @@ Compiler requirements: CUDA/GPU requirements: -* CUDA 11.4+ -* Pascal architecture or better +* CUDA 11.4+. You can obtain CUDA from + [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) -You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads) +GPU Support: +* RMM is tested and supported only on Volta architecture and newer (Compute Capability 7.0+). It + may work on earlier architectures. Python requirements: * `scikit-build-core` From 77b55003b5ae598b7c26553ff39c5dde369c73d8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 18 Dec 2023 14:47:17 -0800 Subject: [PATCH 12/23] Remove HTML builds of librmm (#1415) We no longer require separate librmm doc builds since they are incorporated into the Sphinx build now. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/rmm/pull/1415 --- ci/build_docs.sh | 2 -- doxygen/Doxyfile | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index e11a9f35b..64c17fec8 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -31,8 +31,6 @@ export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build CPP docs" pushd doxygen doxygen Doxyfile -mkdir -p "${RAPIDS_DOCS_DIR}/librmm/html" -mv html/* "${RAPIDS_DOCS_DIR}/librmm/html" popd rapids-logger "Build Python docs" diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile index e956411f2..149603f59 100644 --- a/doxygen/Doxyfile +++ b/doxygen/Doxyfile @@ -1135,7 +1135,7 @@ IGNORE_PREFIX = # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. -GENERATE_HTML = YES +GENERATE_HTML = NO # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of From a4dd4f5001a25b0fc1a1e35380c9c8564f3f7354 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Dec 2023 10:03:19 -0600 Subject: [PATCH 13/23] Update to CCCL 2.2.0. (#1404) This PR updates RMM to CCCL 2.2.0. Do not merge until all of RAPIDS is ready to update. Depends on https://github.com/rapidsai/rapids-cmake/pull/495. Replaces #1247. Authors: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/rmm/pull/1404 --- CMakeLists.txt | 21 ++++----------- .../{get_libcudacxx.cmake => get_cccl.cmake} | 10 +++---- cmake/thirdparty/get_thrust.cmake | 26 ------------------- scripts/load-pretty-printers.in | 2 +- 4 files changed, 11 insertions(+), 48 deletions(-) rename cmake/thirdparty/{get_libcudacxx.cmake => get_cccl.cmake} (75%) delete mode 100644 cmake/thirdparty/get_thrust.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 795969a5c..744307a06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,8 +70,7 @@ rapids_cpm_init() include(cmake/thirdparty/get_fmt.cmake) include(cmake/thirdparty/get_spdlog.cmake) -include(cmake/thirdparty/get_libcudacxx.cmake) -include(cmake/thirdparty/get_thrust.cmake) +include(cmake/thirdparty/get_cccl.cmake) # ################################################################################################## # * library targets -------------------------------------------------------------------------------- @@ -90,8 +89,7 @@ else() target_link_libraries(rmm INTERFACE CUDA::cudart) endif() -target_link_libraries(rmm INTERFACE libcudacxx::libcudacxx) -target_link_libraries(rmm INTERFACE rmm::Thrust) +target_link_libraries(rmm INTERFACE CCCL::CCCL) target_link_libraries(rmm INTERFACE fmt::fmt-header-only) target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only) target_link_libraries(rmm INTERFACE dl) @@ -152,20 +150,12 @@ The goal of the [RMM](https://github.com/rapidsai/rmm) is to provide: A collection of data structures that use the interface for memory allocation ]=]) -set(code_string - [=[ -if(NOT TARGET rmm::Thrust) - thrust_create_target(rmm::Thrust FROM_OPTIONS) -endif() -]=]) - rapids_export( INSTALL rmm EXPORT_SET rmm-exports GLOBAL_TARGETS rmm NAMESPACE rmm:: - DOCUMENTATION doc_string - FINAL_CODE_BLOCK code_string) + DOCUMENTATION doc_string) # ################################################################################################## # * build export ----------------------------------------------------------------------------------- @@ -175,8 +165,7 @@ rapids_export( EXPORT_SET rmm-exports GLOBAL_TARGETS rmm NAMESPACE rmm:: - DOCUMENTATION doc_string - FINAL_CODE_BLOCK code_string) + DOCUMENTATION doc_string) # ################################################################################################## # * make documentation ----------------------------------------------------------------------------- @@ -197,6 +186,6 @@ add_custom_target( # * make gdb helper scripts ------------------------------------------------------------------------ # optionally assemble Thrust pretty-printers -if(Thrust_SOURCE_DIR) +if(CCCL_SOURCE_DIR) configure_file(scripts/load-pretty-printers.in load-pretty-printers @ONLY) endif() diff --git a/cmake/thirdparty/get_libcudacxx.cmake b/cmake/thirdparty/get_cccl.cmake similarity index 75% rename from cmake/thirdparty/get_libcudacxx.cmake rename to cmake/thirdparty/get_cccl.cmake index 14b0d492f..ca6a8654e 100644 --- a/cmake/thirdparty/get_libcudacxx.cmake +++ b/cmake/thirdparty/get_cccl.cmake @@ -12,12 +12,12 @@ # the License. # ============================================================================= -# Use CPM to find or clone libcudacxx -function(find_and_configure_libcudacxx) +# Use CPM to find or clone CCCL +function(find_and_configure_cccl) - include(${rapids-cmake-dir}/cpm/libcudacxx.cmake) - rapids_cpm_libcudacxx(BUILD_EXPORT_SET rmm-exports INSTALL_EXPORT_SET rmm-exports) + include(${rapids-cmake-dir}/cpm/cccl.cmake) + rapids_cpm_cccl(BUILD_EXPORT_SET rmm-exports INSTALL_EXPORT_SET rmm-exports) endfunction() -find_and_configure_libcudacxx() +find_and_configure_cccl() diff --git a/cmake/thirdparty/get_thrust.cmake b/cmake/thirdparty/get_thrust.cmake deleted file mode 100644 index f4125e512..000000000 --- a/cmake/thirdparty/get_thrust.cmake +++ /dev/null @@ -1,26 +0,0 @@ -# ============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -# Use CPM to find or clone thrust -function(find_and_configure_thrust) - - include(${rapids-cmake-dir}/cpm/thrust.cmake) - rapids_cpm_thrust( - NAMESPACE rmm - BUILD_EXPORT_SET rmm-exports - INSTALL_EXPORT_SET rmm-exports) - -endfunction() - -find_and_configure_thrust() diff --git a/scripts/load-pretty-printers.in b/scripts/load-pretty-printers.in index bd59968cc..a473bc5ad 100644 --- a/scripts/load-pretty-printers.in +++ b/scripts/load-pretty-printers.in @@ -1,2 +1,2 @@ -source @Thrust_SOURCE_DIR@/scripts/gdb-pretty-printers.py +source @CCCL_SOURCE_DIR@/thrust/scripts/gdb-pretty-printers.py source @PROJECT_SOURCE_DIR@/scripts/gdb-pretty-printers.py From e2dc7279d9a6bed2860697bcb5155a0ee6e57efe Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 2 Jan 2024 15:33:45 -0600 Subject: [PATCH 14/23] Update dependencies.yaml to support CUDA 12.*. (#1414) This PR updates `dependencies.yaml` so that generic CUDA 12.* dependencies can be specified with a glob, like `cuda: "12.*"`. This feature requires `rapids-dependency-file-generator>=1.8.0`, so the pre-commit hook has been updated. I have not yet added support for a specific CUDA version like 12.1 or 12.2. That can be done separately. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Mark Harris (https://github.com/harrism) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/rmm/pull/1414 --- .pre-commit-config.yaml | 2 +- dependencies.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7f095025a..4df64d11a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.5.1 + rev: v1.8.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/dependencies.yaml b/dependencies.yaml index 0be702c60..99bb73808 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -109,7 +109,7 @@ dependencies: - output_types: [conda, requirements, pyproject] matrices: - matrix: - cuda: "12.0" + cuda: "12.*" packages: - &cuda_python12 cuda-python>=12.0,<13.0a0 - matrix: # All CUDA 11 versions @@ -208,7 +208,7 @@ dependencies: - output_types: [conda, requirements, pyproject] matrices: - matrix: - cuda: "12.0" + cuda: "12.*" packages: - *cuda_python12 - matrix: # All CUDA 11 versions From 9265976815ffe00ab24eb23d0746c62918d96213 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 8 Jan 2024 16:02:33 -0600 Subject: [PATCH 15/23] remove references to setup.py in docs (#1420) Removes remaining references to `setup.py` in documentation. This project no longer has a `setup.py` as of its switch to `pyproject.toml` + `scikit-build-core` (see #1287, #1300). Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/rmm/pull/1420 --- CONTRIBUTING.md | 2 +- README.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 61ac5873a..e24b94f14 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,7 +37,7 @@ are required. To set up a development environment, follow the steps in the [README](https://github.com/rapidsai/rmm/blob/main/README.md) for cloning the repository and creating the conda environment. Once the environment is created, you can build and install RMM using ```bash -$ python setup.py develop +$ python -m pip install ./python ``` This command will build the RMM Python library inside the clone and automatically make it importable when running Python anywhere on your machine. Remember, if you are unsure about anything, don't hesitate to comment on issues diff --git a/README.md b/README.md index 5aa7daab9..e033ef56f 100644 --- a/README.md +++ b/README.md @@ -127,8 +127,7 @@ $ make test - Build, install, and test the `rmm` python package, in the `python` folder: ```bash -$ python setup.py build_ext --inplace -$ python setup.py install +$ python -m pip install -e ./python $ pytest -v ``` From 5c30e876e63f0e6972fcd0982803490cb90dd9c6 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 11 Jan 2024 10:23:45 -0600 Subject: [PATCH 16/23] Refactor CUDA versions in dependencies.yaml. (#1422) This is a follow-up PR to #1414. I thought some more about how to separate `cuda-version` pinnings (which control the CUDA version we use to build and test in conda) from actual CUDA Toolkit package dependencies (which we can handle according to only the major version 11/12). I discussed this PR on a call with @jameslamb in the context of upgrading to CUDA 12.2 (https://github.com/rapidsai/build-planning/issues/6). This set of changes is mostly important for conda builds/tests, since `cuda-version` only controls conda. The pip wheel build/test process is unchanged, since its CUDA versions are controlled by the `shared-workflows` CI images. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - https://github.com/jakirkham - Vyas Ramasubramani (https://github.com/vyasr) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/rmm/pull/1422 --- dependencies.yaml | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 99bb73808..4d1a0ec49 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -8,7 +8,8 @@ files: includes: - build - checks - - cudatoolkit + - cuda + - cuda_version - develop - docs - py_version @@ -17,13 +18,13 @@ files: test_python: output: none includes: - - cudatoolkit + - cuda_version - py_version - test_python test_cpp: output: none includes: - - cudatoolkit + - cuda_version - test_cpp checks: output: none @@ -33,7 +34,7 @@ files: docs: output: none includes: - - cudatoolkit + - cuda_version - docs - py_version py_build: @@ -102,9 +103,8 @@ dependencies: packages: - nvcc_linux-aarch64=11.8 - matrix: - cuda: "12.0" + cuda: "12.*" packages: - - cuda-version=12.0 - cuda-nvcc - output_types: [conda, requirements, pyproject] matrices: @@ -126,7 +126,7 @@ dependencies: - output_types: conda packages: - &doxygen doxygen=1.9.1 - cudatoolkit: + cuda_version: specific: - output_types: conda matrices: @@ -134,31 +134,37 @@ dependencies: cuda: "11.2" packages: - cuda-version=11.2 - - cudatoolkit - matrix: cuda: "11.4" packages: - cuda-version=11.4 - - cudatoolkit - matrix: cuda: "11.5" packages: - cuda-version=11.5 - - cudatoolkit - matrix: cuda: "11.6" packages: - cuda-version=11.6 - - cudatoolkit - matrix: cuda: "11.8" packages: - cuda-version=11.8 - - cudatoolkit - matrix: cuda: "12.0" packages: - cuda-version=12.0 + cuda: + specific: + - output_types: conda + matrices: + - matrix: + cuda: "11.*" + packages: + - cudatoolkit + - matrix: + cuda: "12.*" + packages: develop: common: - output_types: [conda, requirements] From 40ce29509a1e33e1fdb8e786413f06d35c26ea80 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 12 Jan 2024 11:59:05 -0500 Subject: [PATCH 17/23] Remove usages of rapids-env-update (#1423) Reference: https://github.com/rapidsai/ops/issues/2766 Replace rapids-env-update with rapids-configure-conda-channels, rapids-configure-sccache, and rapids-date-string. Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/rmm/pull/1423 --- ci/build_cpp.sh | 6 +++++- ci/build_python.sh | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index d36fcbbe7..7329d4a34 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -3,7 +3,11 @@ set -euo pipefail -source rapids-env-update +rapids-configure-conda-channels + +source rapids-configure-sccache + +source rapids-date-string export CMAKE_GENERATOR=Ninja diff --git a/ci/build_python.sh b/ci/build_python.sh index c1c30da2a..b197b1ae1 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -3,7 +3,11 @@ set -euo pipefail -source rapids-env-update +rapids-configure-conda-channels + +source rapids-configure-sccache + +source rapids-date-string export CMAKE_GENERATOR=Ninja From 64aa9410ec942312f924964647efc7e9e34b392d Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 16 Jan 2024 07:49:27 +1100 Subject: [PATCH 18/23] Require explicit pool size in `pool_memory_resource` and move some things out of detail namespace (#1417) Fixes #1416. - ~Deprecates existing ctors of `pool_memory_resource` that provide optional parameter for the initial pool size.~ - Adds new ctors that require an explicit initial pool size. - We don't yet deprecate anything in this PR because that would break builds of some RAPIDS libraries. We will follow up with PRs to cuDF, cuGraph and anything else needed to remove deprecated usages after this PR is merged. - Adds a new utility `fraction_of_available_device_memory` that calculates the specified fraction of free memory on the current CUDA device. This is now used in tests to provide an explicit pool size and can be used to produce the previous behavior of `pool_memory_resource` for consumers of the library. - Moves `available_device_memory` from a detail header to `cuda_device.hpp` so it is now publicly usable, along with the above utility. - Temporarily adds `detail::available_device_memory` as an alias of the above in order to keep cudf and cugraph building until we can update them. - Duplicates commonly externally used alignment functions that are currently in `rmm::detail` to the public `rmm` namespace. The detail versions will be removed after cuDF and cuGraph are updated to not use them. Authors: - Mark Harris (https://github.com/harrism) - Lawrence Mitchell (https://github.com/wence-) Approvers: - Michael Schellenberger Costa (https://github.com/miscco) - Lawrence Mitchell (https://github.com/wence-) - Jake Hemstad (https://github.com/jrhemstad) URL: https://github.com/rapidsai/rmm/pull/1417 --- .clang-tidy | 4 +- README.md | 10 +- .../device_uvector/device_uvector_bench.cu | 9 +- .../multi_stream_allocations_bench.cu | 6 +- .../random_allocations/random_allocations.cpp | 8 +- benchmarks/replay/replay.cpp | 4 +- doxygen/Doxyfile | 2 +- include/doxygen_groups.h | 3 +- include/rmm/aligned.hpp | 119 +++++++++++++++++ include/rmm/cuda_device.hpp | 46 ++++++- include/rmm/detail/aligned.hpp | 12 +- include/rmm/detail/cuda_util.hpp | 31 ----- .../mr/device/aligned_resource_adaptor.hpp | 18 +-- .../rmm/mr/device/arena_memory_resource.hpp | 7 +- .../rmm/mr/device/binning_memory_resource.hpp | 7 +- .../mr/device/cuda_async_memory_resource.hpp | 5 +- .../cuda_async_view_memory_resource.hpp | 3 +- include/rmm/mr/device/detail/arena.hpp | 12 +- .../detail/stream_ordered_memory_resource.hpp | 8 +- .../rmm/mr/device/device_memory_resource.hpp | 8 +- .../mr/device/fixed_size_memory_resource.hpp | 9 +- .../mr/device/limiting_resource_adaptor.hpp | 10 +- include/rmm/mr/device/per_device_resource.hpp | 12 +- .../rmm/mr/device/pool_memory_resource.hpp | 121 +++++++++++------- include/rmm/mr/host/new_delete_resource.hpp | 12 +- .../rmm/mr/host/pinned_memory_resource.hpp | 10 +- python/docs/conf.py | 8 +- python/docs/librmm_docs/deprecated.rst | 5 + python/docs/librmm_docs/index.rst | 2 + python/docs/librmm_docs/utilities.rst | 5 + python/rmm/_lib/memory_resource.pyx | 11 +- tests/container_multidevice_tests.cu | 4 +- tests/mr/device/aligned_mr_tests.cpp | 7 +- tests/mr/device/arena_mr_tests.cpp | 16 +-- tests/mr/device/failure_callback_mr_tests.cpp | 9 +- tests/mr/device/mr_ref_test.hpp | 20 +-- tests/mr/device/mr_test.hpp | 16 ++- tests/mr/device/pool_mr_tests.cpp | 22 ++-- tests/mr/host/mr_ref_tests.cpp | 6 +- tests/mr/host/mr_tests.cpp | 6 +- tests/mr/host/pinned_pool_mr_tests.cpp | 5 +- 41 files changed, 420 insertions(+), 218 deletions(-) create mode 100644 include/rmm/aligned.hpp delete mode 100644 include/rmm/detail/cuda_util.hpp create mode 100644 python/docs/librmm_docs/deprecated.rst create mode 100644 python/docs/librmm_docs/utilities.rst diff --git a/.clang-tidy b/.clang-tidy index 9b3f844c9..70a0bea16 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -62,8 +62,8 @@ CheckOptions: value: 'alignment' - key: cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues value: '1' - - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: '1' + - key: cppcoreguidelines-avoid-magic-numbers.IgnoredIntegerValues + value: "0;1;2;3;4;50;100" - key: cppcoreguidelines-avoid-do-while.IgnoreMacros value: 'true' ... diff --git a/README.md b/README.md index e033ef56f..a1b85d33c 100644 --- a/README.md +++ b/README.md @@ -332,7 +332,9 @@ Accessing and modifying the default resource is done through two functions: ```c++ rmm::mr::cuda_memory_resource cuda_mr; // Construct a resource that uses a coalescing best-fit pool allocator -rmm::mr::pool_memory_resource pool_mr{&cuda_mr}; +// With the pool initially half of available device memory +auto initial_size = rmm::percent_of_free_device_memory(50); +rmm::mr::pool_memory_resource pool_mr{&cuda_mr, initial_size}; rmm::mr::set_current_device_resource(&pool_mr); // Updates the current device resource pointer to `pool_mr` rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); // Points to `pool_mr` ``` @@ -351,11 +353,13 @@ per-device resources. Here is an example loop that creates `unique_ptr`s to `poo objects for each device and sets them as the per-device resource for that device. ```c++ -std::vector> per_device_pools; +using pool_mr = rmm::mr::pool_memory_resource; +std::vector> per_device_pools; for(int i = 0; i < N; ++i) { cudaSetDevice(i); // set device i before creating MR // Use a vector of unique_ptr to maintain the lifetime of the MRs - per_device_pools.push_back(std::make_unique()); + // Note: for brevity, omitting creation of upstream and computing initial_size + per_device_pools.push_back(std::make_unique(upstream, initial_size)); // Set the per-device resource for device i set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); } diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 454db81a5..8b7f9a5ba 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "../synchronization/synchronization.hpp" +#include #include #include #include @@ -38,7 +39,8 @@ void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{ + &cuda_mr, rmm::percent_of_free_device_memory(50)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) @@ -59,7 +61,8 @@ BENCHMARK(BM_UvectorSizeConstruction) void BM_ThrustVectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{ + &cuda_mr, rmm::percent_of_free_device_memory(50)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 5ed1b31f9..4943e507f 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -100,7 +101,8 @@ inline auto make_cuda_async() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 470442830..2856cd323 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -165,12 +166,13 @@ inline auto make_cuda_async() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() { - auto free = rmm::detail::available_device_memory().first; + auto free = rmm::available_device_memory().first; constexpr auto reserve{64UL << 20}; // Leave some space for CUDA overhead. return rmm::mr::make_owning_wrapper(make_cuda(), free - reserve); } diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 320811875..253708ace 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,7 @@ inline auto make_pool(std::size_t simulated_size) return rmm::mr::make_owning_wrapper( make_simulated(simulated_size), simulated_size, simulated_size); } - return rmm::mr::make_owning_wrapper(make_cuda()); + return rmm::mr::make_owning_wrapper(make_cuda(), 0); } inline auto make_arena(std::size_t simulated_size) diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile index 149603f59..513f15875 100644 --- a/doxygen/Doxyfile +++ b/doxygen/Doxyfile @@ -504,7 +504,7 @@ EXTRACT_PACKAGE = NO # included in the documentation. # The default value is: NO. -EXTRACT_STATIC = NO +EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, diff --git a/include/doxygen_groups.h b/include/doxygen_groups.h index be5eaf17f..70ec0cd68 100644 --- a/include/doxygen_groups.h +++ b/include/doxygen_groups.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,4 +41,5 @@ * @defgroup errors Errors * @defgroup logging Logging * @defgroup thrust_integrations Thrust Integrations + * @defgroup utilities Utilities */ diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp new file mode 100644 index 000000000..7a0feaabf --- /dev/null +++ b/include/rmm/aligned.hpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace rmm { + +/** + * @addtogroup utilities + * @{ + * @file + */ + +/** + * @brief Default alignment used for host memory allocated by RMM. + * + */ +static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; + +/** + * @brief Default alignment used for CUDA memory allocation. + * + */ +static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; + +/** + * @brief Returns whether or not `value` is a power of 2. + * + * @param[in] value to check. + * + * @return Whether the input a power of two with non-negative exponent + */ +constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } + +/** + * @brief Returns whether or not `alignment` is a valid memory alignment. + * + * @param[in] alignment to check + * + * @return Whether the alignment is valid + */ +constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); } + +/** + * @brief Align up to nearest multiple of specified power of 2 + * + * @param[in] value value to align + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return (value + (alignment - 1)) & ~(alignment - 1); +} + +/** + * @brief Align down to the nearest multiple of specified power of 2 + * + * @param[in] value value to align + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return value & ~(alignment - 1); +} + +/** + * @brief Checks whether a value is aligned to a multiple of a specified power of 2 + * + * @param[in] value value to check for alignment + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return true if aligned + */ +constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return value == align_down(value, alignment); +} + +/** + * @brief Checks whether the provided pointer is aligned to a specified @p alignment + * + * @param[in] ptr pointer to check for alignment + * @param[in] alignment required alignment in bytes, must be a power of 2 + * + * @return true if the pointer is aligned + */ +inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return is_aligned(reinterpret_cast(ptr), alignment); +} + +/** @} */ // end of group + +} // namespace rmm diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 054bbb920..565d86926 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -102,6 +103,49 @@ inline int get_num_cuda_devices() return num_dev; } +/** + * @brief Returns the available and total device memory in bytes for the current device + * + * @return The available and total device memory in bytes for the current device as a std::pair. + */ +inline std::pair available_device_memory() +{ + std::size_t free{}; + std::size_t total{}; + RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); + return {free, total}; +} + +namespace detail { + +/** + * @brief Returns the available and total device memory in bytes for the current device + * + * @deprecated Use rmm::available_device_memory() instead. + * + * @return The available and total device memory in bytes for the current device as a std::pair. + */ +//[[deprecated("Use `rmm::available_device_memory` instead.")]] // +const auto available_device_memory = rmm::available_device_memory; + +} // namespace detail + +/** + * @brief Returns the approximate specified percent of available device memory on the current CUDA + * device, aligned (down) to the nearest CUDA allocation size. + * + * @param percent The percent of free memory to return. + * + * @return The recommended initial device memory pool size in bytes. + */ +inline std::size_t percent_of_free_device_memory(int percent) +{ + [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); + auto fraction = static_cast(percent) / 100.0; + return rmm::align_down(static_cast(static_cast(free) * fraction), + rmm::CUDA_ALLOCATION_ALIGNMENT); +} + /** * @brief RAII class that sets the current CUDA device to the specified device on construction * and restores the previous device on destruction. diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 321be53b5..54d287bfb 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @brief Returns whether or not `n` is a power of 2. * */ -constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); } +constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } /** * @brief Returns whether or not `alignment` is a valid memory alignment. @@ -51,7 +51,7 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al /** * @brief Align up to nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -65,7 +65,7 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep /** * @brief Align down to the nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -79,7 +79,7 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc /** * @brief Checks whether a value is aligned to a multiple of a specified power of 2 * - * @param[in] v value to check for alignment + * @param[in] value value to check for alignment * @param[in] alignment amount, in bytes, must be a power of 2 * * @return true if aligned @@ -93,7 +93,7 @@ constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); + return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); } /** diff --git a/include/rmm/detail/cuda_util.hpp b/include/rmm/detail/cuda_util.hpp deleted file mode 100644 index 613b8d156..000000000 --- a/include/rmm/detail/cuda_util.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -namespace rmm::detail { - -/// Gets the available and total device memory in bytes for the current device -inline std::pair available_device_memory() -{ - std::size_t free{}; - std::size_t total{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); - return {free, total}; -} - -} // namespace rmm::detail diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index 05e9915cc..be7c3036c 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include @@ -65,12 +65,12 @@ class aligned_resource_adaptor final : public device_memory_resource { * are aligned. */ explicit aligned_resource_adaptor(Upstream* upstream, - std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, + std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold = default_alignment_threshold) : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold} { RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); - RMM_EXPECTS(rmm::detail::is_supported_alignment(alignment), + RMM_EXPECTS(rmm::is_supported_alignment(alignment), "Allocation alignment is not a power of 2."); } @@ -127,14 +127,14 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { return upstream_->allocate(bytes, stream); } auto const size = upstream_allocation_size(bytes); void* pointer = upstream_->allocate(size, stream); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto const address = reinterpret_cast(pointer); - auto const aligned_address = rmm::detail::align_up(address, alignment_); + auto const aligned_address = rmm::align_up(address, alignment_); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) void* aligned_pointer = reinterpret_cast(aligned_address); if (pointer != aligned_pointer) { @@ -153,7 +153,7 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { upstream_->deallocate(ptr, bytes, stream); } else { { @@ -208,8 +208,8 @@ class aligned_resource_adaptor final : public device_memory_resource { */ std::size_t upstream_allocation_size(std::size_t bytes) const { - auto const aligned_size = rmm::detail::align_up(bytes, alignment_); - return aligned_size + alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; + auto const aligned_size = rmm::align_up(bytes, alignment_); + return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT; } Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 929b8454f..1b821b440 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -145,7 +146,7 @@ class arena_memory_resource final : public device_memory_resource { #ifdef RMM_ARENA_USE_SIZE_CLASSES bytes = rmm::mr::detail::arena::align_to_size_class(bytes); #else - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); #endif auto& arena = get_arena(stream); @@ -195,7 +196,7 @@ class arena_memory_resource final : public device_memory_resource { #ifdef RMM_ARENA_USE_SIZE_CLASSES bytes = rmm::mr::detail::arena::align_to_size_class(bytes); #else - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); #endif auto& arena = get_arena(stream); diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp index c2e1621a6..2a9975b18 100644 --- a/include/rmm/mr/device/binning_memory_resource.hpp +++ b/include/rmm/mr/device/binning_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -138,8 +138,7 @@ class binning_memory_resource final : public device_memory_resource { */ void add_bin(std::size_t allocation_size, device_memory_resource* bin_resource = nullptr) { - allocation_size = - rmm::detail::align_up(allocation_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + allocation_size = rmm::align_up(allocation_size, rmm::CUDA_ALLOCATION_ALIGNMENT); if (nullptr != bin_resource) { resource_bins_.insert({allocation_size, bin_resource}); diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index de31c7dc4..f8295c6f6 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -120,7 +119,7 @@ class cuda_async_memory_resource final : public device_memory_resource { pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled)); } - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); // Need an l-value to take address to pass to cudaMemPoolSetAttribute uint64_t threshold = release_threshold.value_or(total); diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 825fcab1e..562944669 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index d8da58493..c7965ca34 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ #pragma once +#include +#include #include -#include -#include #include #include #include @@ -508,8 +508,8 @@ class global_arena final { : upstream_mr_{upstream_mr} { RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer."); - auto const size = rmm::detail::align_down(arena_size.value_or(default_size()), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const size = + rmm::align_down(arena_size.value_or(default_size()), rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size >= superblock::minimum_size, "Arena size smaller than minimum superblock size."); initialize(size); @@ -692,7 +692,7 @@ class global_arena final { */ constexpr std::size_t default_size() const { - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); return free / 2; } diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index a57bf1c6d..1d6829cb5 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -207,7 +207,7 @@ class stream_ordered_memory_resource : public crtp, public device_ auto stream_event = get_event(stream); - size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(), "Maximum allocation size exceeded", rmm::out_of_memory); @@ -241,7 +241,7 @@ class stream_ordered_memory_resource : public crtp, public device_ lock_guard lock(mtx_); auto stream_event = get_event(stream); - size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); auto const block = this->underlying().free_block(ptr, size); // TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 63e5f39a4..e3014b6c3 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,10 +78,12 @@ namespace rmm::mr { * device. * * @code{.cpp} - * std::vector> per_device_pools; + * using pool_mr = rmm::mr::pool_memory_resource; + * std::vector> per_device_pools; * for(int i = 0; i < N; ++i) { * cudaSetDevice(i); - * per_device_pools.push_back(std::make_unique()); + * // Note: for brevity, omitting creation of upstream and computing initial_size + * per_device_pools.push_back(std::make_unique(upstream, initial_size)); * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); * } * @endcode diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index 01fb8a6bc..91cc95c53 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -77,7 +77,7 @@ class fixed_size_memory_resource std::size_t block_size = default_block_size, std::size_t blocks_to_preallocate = default_blocks_to_preallocate) : upstream_mr_{upstream_mr}, - block_size_{rmm::detail::align_up(block_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT)}, + block_size_{rmm::align_up(block_size, rmm::CUDA_ALLOCATION_ALIGNMENT)}, upstream_chunk_size_{block_size * blocks_to_preallocate} { // allocate initial blocks and insert into free list @@ -207,8 +207,7 @@ class fixed_size_memory_resource { // Deallocating a fixed-size block just inserts it in the free list, which is // handled by the parent class - RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <= - block_size_); + RMM_LOGGING_ASSERT(rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT) <= block_size_); return block_type{ptr}; } diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index 6573956d0..2123c3cac 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -54,7 +54,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ limiting_resource_adaptor(Upstream* upstream, std::size_t allocation_limit, - std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT) + std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) : allocation_limit_{allocation_limit}, allocated_bytes_(0), alignment_(alignment), @@ -134,7 +134,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - auto const proposed_size = rmm::detail::align_up(bytes, alignment_); + auto const proposed_size = rmm::align_up(bytes, alignment_); auto const old = allocated_bytes_.fetch_add(proposed_size); if (old + proposed_size <= allocation_limit_) { try { @@ -158,7 +158,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - std::size_t allocated_size = rmm::detail::align_up(bytes, alignment_); + std::size_t allocated_size = rmm::align_up(bytes, alignment_); upstream_->deallocate(ptr, bytes, stream); allocated_bytes_ -= allocated_size; } diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp index 139389f0c..a56a784a1 100644 --- a/include/rmm/mr/device/per_device_resource.hpp +++ b/include/rmm/mr/device/per_device_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,6 +69,16 @@ * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); * } * @endcode + * @code{.cpp} + * using pool_mr = rmm::mr::pool_memory_resource; + * std::vector> per_device_pools; + * for(int i = 0; i < N; ++i) { + * cudaSetDevice(i); + * // Note: for brevity, omitting creation of upstream and computing initial_size + * per_device_pools.push_back(std::make_unique(upstream, initial_size)); + * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); + * } + * @endcode */ namespace rmm::mr { diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 20b250524..c0317cf57 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,8 @@ */ #pragma once +#include #include -#include -#include #include #include #include @@ -110,10 +109,37 @@ class pool_memory_resource final friend class detail::stream_ordered_memory_resource, detail::coalescing_free_list>; + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory + * pool using `upstream_mr`. + * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available memory from the upstream resource. + */ + //[[deprecated("Must specify initial_pool_size")]] // + explicit pool_memory_resource(Upstream* upstream_mr, + thrust::optional initial_pool_size = thrust::nullopt, + thrust::optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) + { + } + /** * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * * @throws rmm::logic_error if `upstream_mr == nullptr` * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. @@ -121,24 +147,46 @@ class pool_memory_resource final * multiple of pool_memory_resource::allocation_alignment bytes. * * @param upstream_mr The memory_resource from which to allocate blocks for the pool. - * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the - * available memory on the current device. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all - * of the available memory on the current device. + * of the available memory from the upstream resource. */ - explicit pool_memory_resource(Upstream* upstream_mr, + template , int> = 0> + //[[deprecated("Must specify initial_pool_size")]] // + explicit pool_memory_resource(Upstream2& upstream_mr, thrust::optional initial_pool_size = thrust::nullopt, thrust::optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) + { + } + + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using + * `upstream_mr`. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available from the upstream resource. + */ + explicit pool_memory_resource(Upstream* upstream_mr, + std::size_t initial_pool_size, + thrust::optional maximum_pool_size = thrust::nullopt) : upstream_mr_{[upstream_mr]() { RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer."); return upstream_mr; }()} { - RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT), "Error, Initial pool size required to be a multiple of 256 bytes"); - RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::is_aligned(maximum_pool_size.value_or(0), rmm::CUDA_ALLOCATION_ALIGNMENT), "Error, Maximum pool size required to be a multiple of 256 bytes"); initialize_pool(initial_pool_size, maximum_pool_size); @@ -149,21 +197,20 @@ class pool_memory_resource final * `upstream_mr`. * * @throws rmm::logic_error if `upstream_mr == nullptr` - * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a - * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. * * @param upstream_mr The memory_resource from which to allocate blocks for the pool. - * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the - * available memory on the current device. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all - * of the available memory on the current device. + * of the available memory from the upstream resource. */ template , int> = 0> explicit pool_memory_resource(Upstream2& upstream_mr, - thrust::optional initial_pool_size = thrust::nullopt, + std::size_t initial_pool_size, thrust::optional maximum_pool_size = thrust::nullopt) : pool_memory_resource(cuda::std::addressof(upstream_mr), initial_pool_size, maximum_pool_size) { @@ -276,38 +323,22 @@ class pool_memory_resource final /** * @brief Allocate initial memory for the pool * - * If initial_size is unset, then queries the upstream memory resource for available memory if - * upstream supports `get_mem_info`, or queries the device (using CUDA API) for available memory - * if not. Then attempts to initialize to half the available memory. - * - * If initial_size is set, then tries to initialize the pool to that size. - * * @param initial_size The optional initial size for the pool * @param maximum_size The optional maximum size for the pool + * + * @throws logic_error if @p initial_size is larger than @p maximum_size (if set). */ - // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) - void initialize_pool(thrust::optional initial_size, - thrust::optional maximum_size) + void initialize_pool(std::size_t initial_size, thrust::optional maximum_size) { - auto const try_size = [&]() { - if (not initial_size.has_value()) { - auto const [free, total] = (get_upstream()->supports_get_mem_info()) - ? get_upstream()->get_mem_info(cuda_stream_legacy) - : rmm::detail::available_device_memory(); - return rmm::detail::align_up(std::min(free, total / 2), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - } - return initial_size.value(); - }(); - current_pool_size_ = 0; // try_to_expand will set this if it succeeds maximum_pool_size_ = maximum_size; - RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits::max()), - "Initial pool size exceeds the maximum pool size!"); + RMM_EXPECTS( + initial_size <= maximum_pool_size_.value_or(std::numeric_limits::max()), + "Initial pool size exceeds the maximum pool size!"); - if (try_size > 0) { - auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy); + if (initial_size > 0) { + auto const block = try_to_expand(initial_size, initial_size, cuda_stream_legacy); this->insert_block(block, cuda_stream_legacy); } } @@ -346,9 +377,9 @@ class pool_memory_resource final { if (maximum_pool_size_.has_value()) { auto const unaligned_remaining = maximum_pool_size_.value() - pool_size(); - using rmm::detail::align_up; - auto const remaining = align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - auto const aligned_size = align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + using rmm::align_up; + auto const remaining = align_up(unaligned_remaining, rmm::CUDA_ALLOCATION_ALIGNMENT); + auto const aligned_size = align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0; } return std::max(size, pool_size()); @@ -416,7 +447,7 @@ class pool_memory_resource final RMM_LOGGING_ASSERT(iter != allocated_blocks_.end()); auto block = *iter; - RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment)); + RMM_LOGGING_ASSERT(block.size() == rmm::align_up(size, allocation_alignment)); allocated_blocks_.erase(iter); return block; diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 044f74063..4bb272df3 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include +#include #include #include @@ -58,12 +59,11 @@ class new_delete_resource final : public host_memory_resource { * @return Pointer to the newly allocated memory */ void* do_allocate(std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { // If the requested alignment isn't supported, use default - alignment = (rmm::detail::is_supported_alignment(alignment)) - ? alignment - : rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; return rmm::detail::aligned_allocate( bytes, alignment, [](std::size_t size) { return ::operator new(size); }); @@ -84,7 +84,7 @@ class new_delete_resource final : public host_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { rmm::detail::aligned_deallocate( ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index e49767faf..b5c273ef5 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -114,7 +115,7 @@ class pinned_memory_resource final : public host_memory_resource { */ void deallocate_async(void* ptr, std::size_t bytes, std::size_t alignment, cuda_stream_view) { - do_deallocate(ptr, rmm::detail::align_up(bytes, alignment)); + do_deallocate(ptr, rmm::align_up(bytes, alignment)); } /** @@ -143,9 +144,8 @@ class pinned_memory_resource final : public host_memory_resource { if (0 == bytes) { return nullptr; } // If the requested alignment isn't supported, use default - alignment = (rmm::detail::is_supported_alignment(alignment)) - ? alignment - : rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; diff --git a/python/docs/conf.py b/python/docs/conf.py index ba5aa6d20..b4c141eb4 100644 --- a/python/docs/conf.py +++ b/python/docs/conf.py @@ -248,14 +248,16 @@ def on_missing_reference(app, env, node, contnode): if match := re.search("(.*)<.*>", reftarget): reftarget = match.group(1) + # This is the document we're linking _from_, and hence where + # we should try and resolve the xref wrt. + refdoc = node.get("refdoc") # Try to find the target prefixed with e.g. namespaces in case that's # all that's missing. Include the empty prefix in case we're searching # for a stripped template. extra_prefixes = ["rmm::", "rmm::mr::", "mr::", ""] - for (name, dispname, type, docname, anchor, priority) in env.domains[ + for (name, dispname, typ, docname, anchor, priority) in env.domains[ "cpp" ].get_objects(): - for prefix in extra_prefixes: if ( name == f"{prefix}{reftarget}" @@ -263,7 +265,7 @@ def on_missing_reference(app, env, node, contnode): ): return env.domains["cpp"].resolve_xref( env, - docname, + refdoc, app.builder, node["reftype"], name, diff --git a/python/docs/librmm_docs/deprecated.rst b/python/docs/librmm_docs/deprecated.rst new file mode 100644 index 000000000..b5ed90caa --- /dev/null +++ b/python/docs/librmm_docs/deprecated.rst @@ -0,0 +1,5 @@ +Deprecated functionality +======================== + +.. doxygenpage:: deprecated + :content-only: diff --git a/python/docs/librmm_docs/index.rst b/python/docs/librmm_docs/index.rst index 6afd94d2e..2b61deb9f 100644 --- a/python/docs/librmm_docs/index.rst +++ b/python/docs/librmm_docs/index.rst @@ -17,6 +17,8 @@ librmm Documentation cuda_streams errors logging + utilities + deprecated .. doxygennamespace:: rmm diff --git a/python/docs/librmm_docs/utilities.rst b/python/docs/librmm_docs/utilities.rst new file mode 100644 index 000000000..25b455746 --- /dev/null +++ b/python/docs/librmm_docs/utilities.rst @@ -0,0 +1,5 @@ +Utilities +============ + +.. doxygengroup:: utilities + :members: diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index ce7f45e19..690e2e338 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -120,12 +120,15 @@ cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ win32 win32_kmt +cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: + size_t percent_of_free_device_memory(int percent) except + + cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass pool_memory_resource[Upstream](device_memory_resource): pool_memory_resource( Upstream* upstream_mr, - optional[size_t] initial_pool_size, + size_t initial_pool_size, optional[size_t] maximum_pool_size) except + size_t pool_size() @@ -369,12 +372,12 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor): initial_pool_size=None, maximum_pool_size=None ): - cdef optional[size_t] c_initial_pool_size + cdef size_t c_initial_pool_size cdef optional[size_t] c_maximum_pool_size c_initial_pool_size = ( - optional[size_t]() if + percent_of_free_device_memory(50) if initial_pool_size is None - else make_optional[size_t](initial_pool_size) + else initial_pool_size ) c_maximum_pool_size = ( optional[size_t]() if diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu index 9de9ddf40..e58ba53a2 100644 --- a/tests/container_multidevice_tests.cu +++ b/tests/container_multidevice_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,12 +15,12 @@ */ #include "device_check_resource_adaptor.hpp" -#include "rmm/mr/device/per_device_resource.hpp" #include #include #include #include +#include #include diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index dfcdfa72f..5fbb4b8f1 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,8 @@ */ #include "../../mock_resource.hpp" -#include + +#include #include #include #include @@ -223,7 +224,7 @@ TEST(AlignedTest, AlignRealPointer) auto const threshold{65536}; aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold}; void* alloc = mr.allocate(threshold); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc, alignment)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc, alignment)); mr.deallocate(alloc, threshold); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 7525cac9f..1068e0cf0 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,10 @@ */ #include "../../byte_literals.hpp" + +#include +#include #include -#include -#include #include #include #include @@ -487,10 +488,9 @@ TEST_F(ArenaTest, SizeSmallerThanSuperblockSize) // NOLINT TEST_F(ArenaTest, AllocateNinetyPercent) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) - auto const free = rmm::detail::available_device_memory().first; - auto const ninety_percent = - rmm::detail::align_up(static_cast(static_cast(free) * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const free = rmm::available_device_memory().first; + auto const ninety_percent = rmm::align_up( + static_cast(static_cast(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT); arena_mr mr(rmm::mr::get_current_device_resource(), ninety_percent); }()); } @@ -501,7 +501,7 @@ TEST_F(ArenaTest, SmallMediumLarge) // NOLINT arena_mr mr(rmm::mr::get_current_device_resource()); auto* small = mr.allocate(256); auto* medium = mr.allocate(64_MiB); - auto const free = rmm::detail::available_device_memory().first; + auto const free = rmm::available_device_memory().first; auto* large = mr.allocate(free / 3); mr.deallocate(small, 256); mr.deallocate(medium, 64_MiB); diff --git a/tests/mr/device/failure_callback_mr_tests.cpp b/tests/mr/device/failure_callback_mr_tests.cpp index bb5484c69..79acd5c7e 100644 --- a/tests/mr/device/failure_callback_mr_tests.cpp +++ b/tests/mr/device/failure_callback_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,16 +15,17 @@ */ #include "../../byte_literals.hpp" -#include "rmm/cuda_stream_view.hpp" -#include "rmm/mr/device/device_memory_resource.hpp" -#include +#include #include #include +#include #include #include +#include + namespace rmm::test { namespace { diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 804c710a5..25ff76891 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ #include "../../byte_literals.hpp" +#include +#include #include #include -#include #include #include #include @@ -78,7 +79,7 @@ inline void test_allocate(resource_ref ref, std::size_t bytes) try { void* ptr = ref.allocate(bytes); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); ref.deallocate(ptr, bytes); } catch (rmm::out_of_memory const& e) { @@ -94,7 +95,7 @@ inline void test_allocate_async(async_resource_ref ref, void* ptr = ref.allocate_async(bytes, stream); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); ref.deallocate_async(ptr, bytes, stream); if (not stream.is_default()) { stream.synchronize(); } @@ -202,7 +203,7 @@ inline void test_random_allocations(resource_ref ref, alloc.size = distribution(generator); EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [&ref](allocation& alloc) { @@ -228,7 +229,7 @@ inline void test_random_async_allocations(async_resource_ref ref, EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, &ref](allocation& alloc) { @@ -269,7 +270,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(size), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -316,7 +317,7 @@ inline void test_mixed_random_async_allocation_free(async_resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -379,7 +380,8 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 03f880e72..ef4b4bc80 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ #include "../../byte_literals.hpp" +#include +#include #include #include -#include #include #include #include @@ -74,7 +75,7 @@ inline void test_get_current_device_resource() EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } @@ -86,7 +87,7 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(bytes); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); mr->deallocate(ptr, bytes); if (not stream.is_default()) { stream.synchronize(); } @@ -154,7 +155,7 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { @@ -196,7 +197,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -257,7 +258,8 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 2f32889d0..a2793386f 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,8 @@ * limitations under the License. */ +#include #include -#include -#include #include #include #include @@ -39,7 +38,7 @@ using limiting_mr = rmm::mr::limiting_resource_adaptor(static_cast(free) * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const ninety_percent_pool = rmm::percent_of_free_device_memory(90); pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool}; }; EXPECT_NO_THROW(allocate_ninety()); @@ -83,9 +80,8 @@ TEST(PoolTest, AllocateNinetyPercent) TEST(PoolTest, TwoLargeBuffers) { auto two_large = []() { - auto const [free, total] = rmm::detail::available_device_memory(); - (void)total; - pool_mr mr{rmm::mr::get_current_device_resource()}; + [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); + pool_mr mr{rmm::mr::get_current_device_resource(), rmm::percent_of_free_device_memory(50)}; auto* ptr1 = mr.allocate(free / 4); auto* ptr2 = mr.allocate(free / 4); mr.deallocate(ptr1, free / 4); @@ -158,8 +154,8 @@ TEST(PoolTest, NonAlignedPoolSize) TEST(PoolTest, UpstreamDoesntSupportMemInfo) { cuda_mr cuda; - pool_mr mr1(&cuda); - pool_mr mr2(&mr1); + pool_mr mr1(&cuda, 0); + pool_mr mr2(&mr1, 0); auto* ptr = mr2.allocate(1024); mr2.deallocate(ptr, 1024); } diff --git a/tests/mr/host/mr_ref_tests.cpp b/tests/mr/host/mr_ref_tests.cpp index 6563eb635..416641f18 100644 --- a/tests/mr/host/mr_ref_tests.cpp +++ b/tests/mr/host/mr_ref_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include "../../byte_literals.hpp" -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace rmm::test { namespace { inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return rmm::detail::is_pointer_aligned(ptr, alignment); + return rmm::is_pointer_aligned(ptr, alignment); } // Returns true if a pointer points to a device memory or managed memory allocation. diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp index 678d6aeb8..e0078c920 100644 --- a/tests/mr/host/mr_tests.cpp +++ b/tests/mr/host/mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include "../../byte_literals.hpp" -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace rmm::test { namespace { inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return rmm::detail::is_pointer_aligned(ptr, alignment); + return rmm::is_pointer_aligned(ptr, alignment); } // Returns true if a pointer points to a device memory or managed memory allocation. diff --git a/tests/mr/host/pinned_pool_mr_tests.cpp b/tests/mr/host/pinned_pool_mr_tests.cpp index dcdae37fa..d10b85e72 100644 --- a/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/tests/mr/host/pinned_pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -33,7 +32,7 @@ using pool_mr = rmm::mr::pool_memory_resource; TEST(PinnedPoolTest, ThrowOnNullUpstream) { - auto construct_nullptr = []() { pool_mr mr{nullptr}; }; + auto construct_nullptr = []() { pool_mr mr{nullptr, 1024}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); } From bb8fdf1eaaf1b3245eb2d7e16da745a715ce04f8 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Thu, 18 Jan 2024 06:52:46 +1100 Subject: [PATCH 19/23] Deprecate detail::available_device_memory, most detail/aligned.hpp utilities, and optional pool_memory_resource initial size (#1424) Follow-on to #1417, this PR deprecates the following: - `rmm::detail::available_device_memory` in favor of rmm::available_device_memory - `rmm::detail::is_aligned`, `rmm::detail::align_up` and related alignment utility functions in favor of the `rmm::` top level namespace versions. - The `rmm::pool_memory_resource` constructors that take an optional initial size parameter. Should be merged after the following: - https://github.com/rapidsai/cugraph/pull/4086 - https://github.com/rapidsai/cudf/pull/14741 - https://github.com/rapidsai/raft/pull/2088 Authors: - Mark Harris (https://github.com/harrism) Approvers: - Michael Schellenberger Costa (https://github.com/miscco) - Rong Ou (https://github.com/rongou) URL: https://github.com/rapidsai/rmm/pull/1424 --- include/rmm/aligned.hpp | 19 +++++--- include/rmm/cuda_device.hpp | 2 +- include/rmm/detail/aligned.hpp | 45 ++++++++++++------- .../rmm/mr/device/device_memory_resource.hpp | 8 ++-- .../rmm/mr/device/pool_memory_resource.hpp | 4 +- 5 files changed, 48 insertions(+), 30 deletions(-) diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp index 7a0feaabf..bd39d7949 100644 --- a/include/rmm/aligned.hpp +++ b/include/rmm/aligned.hpp @@ -47,7 +47,10 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * * @return Whether the input a power of two with non-negative exponent */ -constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } +[[nodiscard]] constexpr bool is_pow2(std::size_t value) noexcept +{ + return (value != 0U) && ((value & (value - 1)) == 0U); +} /** * @brief Returns whether or not `alignment` is a valid memory alignment. @@ -56,7 +59,10 @@ constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (v * * @return Whether the alignment is valid */ -constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); } +[[nodiscard]] constexpr bool is_supported_alignment(std::size_t alignment) noexcept +{ + return is_pow2(alignment); +} /** * @brief Align up to nearest multiple of specified power of 2 @@ -66,7 +72,7 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept +[[nodiscard]] constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept { assert(is_supported_alignment(alignment)); return (value + (alignment - 1)) & ~(alignment - 1); @@ -80,7 +86,7 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept +[[nodiscard]] constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept { assert(is_supported_alignment(alignment)); return value & ~(alignment - 1); @@ -94,7 +100,7 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc * * @return true if aligned */ -constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept +[[nodiscard]] constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept { assert(is_supported_alignment(alignment)); return value == align_down(value, alignment); @@ -108,7 +114,8 @@ constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept * * @return true if the pointer is aligned */ -inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +[[nodiscard]] inline bool is_pointer_aligned( + void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) noexcept { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) return is_aligned(reinterpret_cast(ptr), alignment); diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 565d86926..02017c3da 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -125,7 +125,7 @@ namespace detail { * * @return The available and total device memory in bytes for the current device as a std::pair. */ -//[[deprecated("Use `rmm::available_device_memory` instead.")]] // +[[deprecated("Use `rmm::available_device_memory` instead.")]] // const auto available_device_memory = rmm::available_device_memory; } // namespace detail diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 54d287bfb..7e7b42a18 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include @@ -28,25 +30,34 @@ namespace rmm::detail { * @brief Default alignment used for host memory allocated by RMM. * */ -static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; +[[deprecated("Use rmm::RMM_DEFAULT_HOST_ALIGNMENT instead.")]] static constexpr std::size_t + RMM_DEFAULT_HOST_ALIGNMENT{rmm::RMM_DEFAULT_HOST_ALIGNMENT}; /** * @brief Default alignment used for CUDA memory allocation. * */ -static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; +[[deprecated("Use rmm::CUDA_ALLOCATION_ALIGNMENT instead.")]] static constexpr std::size_t + CUDA_ALLOCATION_ALIGNMENT{rmm::CUDA_ALLOCATION_ALIGNMENT}; /** * @brief Returns whether or not `n` is a power of 2. * */ -constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } +[[deprecated("Use rmm::is_pow2 instead.")]] constexpr bool is_pow2(std::size_t value) noexcept +{ + return rmm::is_pow2(value); +} /** * @brief Returns whether or not `alignment` is a valid memory alignment. * */ -constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); } +[[deprecated("Use rmm::is_supported_alignment instead.")]] constexpr bool is_supported_alignment( + std::size_t alignment) noexcept +{ + return rmm::is_pow2(alignment); +} /** * @brief Align up to nearest multiple of specified power of 2 @@ -56,10 +67,10 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept +[[deprecated("Use rmm::align_up instead.")]] constexpr std::size_t align_up( + std::size_t value, std::size_t alignment) noexcept { - assert(is_supported_alignment(alignment)); - return (value + (alignment - 1)) & ~(alignment - 1); + return rmm::align_up(value, alignment); } /** @@ -70,10 +81,10 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept +[[deprecated("Use rmm::align_down instead.")]] constexpr std::size_t align_down( + std::size_t value, std::size_t alignment) noexcept { - assert(is_supported_alignment(alignment)); - return value & ~(alignment - 1); + return rmm::align_down(value, alignment); } /** @@ -84,16 +95,16 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc * * @return true if aligned */ -constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept +[[deprecated("Use rmm::is_aligned instead.")]] constexpr bool is_aligned( + std::size_t value, std::size_t alignment) noexcept { - assert(is_supported_alignment(alignment)); - return value == align_down(value, alignment); + return rmm::is_aligned(value, alignment); } -inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +[[deprecated("Use rmm::is_pointer_aligned instead.")]] inline bool is_pointer_aligned( + void* ptr, std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); + return rmm::is_pointer_aligned(ptr, alignment); } /** @@ -126,7 +137,7 @@ inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATIO template void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) { - assert(is_pow2(alignment)); + assert(rmm::is_pow2(alignment)); // allocate memory for bytes, plus potential alignment correction, // plus store of the correction offset diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index e3014b6c3..55006f9b0 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -173,7 +173,7 @@ class device_memory_resource { */ void* allocate(std::size_t bytes, std::size_t alignment) { - return do_allocate(rmm::detail::align_up(bytes, alignment), cuda_stream_view{}); + return do_allocate(rmm::align_up(bytes, alignment), cuda_stream_view{}); } /** @@ -191,7 +191,7 @@ class device_memory_resource { */ void deallocate(void* ptr, std::size_t bytes, std::size_t alignment) { - do_deallocate(ptr, rmm::detail::align_up(bytes, alignment), cuda_stream_view{}); + do_deallocate(ptr, rmm::align_up(bytes, alignment), cuda_stream_view{}); } /** @@ -209,7 +209,7 @@ class device_memory_resource { */ void* allocate_async(std::size_t bytes, std::size_t alignment, cuda_stream_view stream) { - return do_allocate(rmm::detail::align_up(bytes, alignment), stream); + return do_allocate(rmm::align_up(bytes, alignment), stream); } /** @@ -248,7 +248,7 @@ class device_memory_resource { std::size_t alignment, cuda_stream_view stream) { - do_deallocate(ptr, rmm::detail::align_up(bytes, alignment), stream); + do_deallocate(ptr, rmm::align_up(bytes, alignment), stream); } /** diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index c0317cf57..63239e750 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -126,7 +126,7 @@ class pool_memory_resource final * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all * of the available memory from the upstream resource. */ - //[[deprecated("Must specify initial_pool_size")]] // + [[deprecated("Must specify initial_pool_size")]] // explicit pool_memory_resource(Upstream* upstream_mr, thrust::optional initial_pool_size = thrust::nullopt, thrust::optional maximum_pool_size = thrust::nullopt) @@ -153,7 +153,7 @@ class pool_memory_resource final */ template , int> = 0> - //[[deprecated("Must specify initial_pool_size")]] // + [[deprecated("Must specify initial_pool_size")]] // explicit pool_memory_resource(Upstream2& upstream_mr, thrust::optional initial_pool_size = thrust::nullopt, thrust::optional maximum_pool_size = thrust::nullopt) From 12f8de30531fb2bd64184390c3a2c12b88a3d692 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Fri, 19 Jan 2024 10:39:58 +1100 Subject: [PATCH 20/23] Add a host-pinned memory resource that can be used as upstream for `pool_memory_resource`. (#1392) Depends on #1417 Adds a new `host_pinned_memory_resource` that implements the new `cuda::mr::memory_resource` and `cuda::mr::async_memory_resource` concepts which makes it usable as an upstream MR for `rmm::mr::device_memory_resource`. Also tests a pool made with this new MR as the upstream. Note that the tests explicitly set the initial and maximum pool sizes as using the defaults does not currently work. See #1388 . Closes #618 Authors: - Mark Harris (https://github.com/harrism) - Lawrence Mitchell (https://github.com/wence-) Approvers: - Michael Schellenberger Costa (https://github.com/miscco) - Alessandro Bellina (https://github.com/abellina) - Lawrence Mitchell (https://github.com/wence-) - Jake Hemstad (https://github.com/jrhemstad) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/rmm/pull/1392 --- include/rmm/aligned.hpp | 10 +- include/rmm/detail/aligned.hpp | 55 ++--- include/rmm/mr/host/new_delete_resource.hpp | 4 +- .../rmm/mr/host/pinned_memory_resource.hpp | 4 +- .../rmm/mr/pinned_host_memory_resource.hpp | 222 ++++++++++++++++++ tests/mr/device/mr_ref_test.hpp | 30 +-- tests/mr/device/mr_test.hpp | 35 ++- tests/mr/device/mr_tests.cpp | 2 + tests/mr/device/test_utils.hpp | 50 ++++ 9 files changed, 336 insertions(+), 76 deletions(-) create mode 100644 include/rmm/mr/pinned_host_memory_resource.hpp create mode 100644 tests/mr/device/test_utils.hpp diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp index bd39d7949..6e9970ab8 100644 --- a/include/rmm/aligned.hpp +++ b/include/rmm/aligned.hpp @@ -43,9 +43,9 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; /** * @brief Returns whether or not `value` is a power of 2. * - * @param[in] value to check. + * @param[in] value value to check. * - * @return Whether the input a power of two with non-negative exponent + * @return True if the input is a power of two with non-negative integer exponent, false otherwise. */ [[nodiscard]] constexpr bool is_pow2(std::size_t value) noexcept { @@ -57,7 +57,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * * @param[in] alignment to check * - * @return Whether the alignment is valid + * @return True if the alignment is valid, false otherwise. */ [[nodiscard]] constexpr bool is_supported_alignment(std::size_t alignment) noexcept { @@ -70,7 +70,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * - * @return Return the aligned value, as one would expect + * @return the aligned value */ [[nodiscard]] constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept { @@ -84,7 +84,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * - * @return Return the aligned value, as one would expect + * @return the aligned value */ [[nodiscard]] constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept { diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 7e7b42a18..eb31658e9 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -108,36 +108,35 @@ namespace rmm::detail { } /** - * @brief Allocates sufficient memory to satisfy the requested size `bytes` with + * @brief Allocates sufficient host-accessible memory to satisfy the requested size `bytes` with * alignment `alignment` using the unary callable `alloc` to allocate memory. * - * Given a pointer `p` to an allocation of size `n` returned from the unary - * callable `alloc`, the pointer `q` returned from `aligned_alloc` points to a - * location within the `n` bytes with sufficient space for `bytes` that - * satisfies `alignment`. + * Given a pointer `p` to an allocation of size `n` returned from the unary callable `alloc`, the + * pointer `q` returned from `aligned_alloc` points to a location within the `n` bytes with + * sufficient space for `bytes` that satisfies `alignment`. * - * In order to retrieve the original allocation pointer `p`, the offset - * between `p` and `q` is stored at `q - sizeof(std::ptrdiff_t)`. + * In order to retrieve the original allocation pointer `p`, the offset between `p` and `q` is + * stored at `q - sizeof(std::ptrdiff_t)`. * - * Allocations returned from `aligned_allocate` *MUST* be freed by calling - * `aligned_deallocate` with the same arguments for `bytes` and `alignment` with - * a compatible unary `dealloc` callable capable of freeing the memory returned - * from `alloc`. + * Allocations returned from `aligned_host_allocate` *MUST* be freed by calling + * `aligned_host_deallocate` with the same arguments for `bytes` and `alignment` with a compatible + * unary `dealloc` callable capable of freeing the memory returned from `alloc`. * * If `alignment` is not a power of 2, behavior is undefined. + * If `Alloc` does not allocate host-accessible memory, behavior is undefined. * * @param bytes The desired size of the allocation * @param alignment Desired alignment of allocation * @param alloc Unary callable given a size `n` will allocate at least `n` bytes - * of host memory. - * @tparam Alloc a unary callable type that allocates memory. + * of host-accessible memory. + * @tparam Alloc a unary callable type that allocates host-accessible memory. * @return void* Pointer into allocation of at least `bytes` with desired * `alignment`. */ template -void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) +void* aligned_host_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) { - assert(rmm::is_pow2(alignment)); + assert(rmm::is_supported_alignment(alignment)); // allocate memory for bytes, plus potential alignment correction, // plus store of the correction offset @@ -163,25 +162,27 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) } /** - * @brief Frees an allocation returned from `aligned_allocate`. + * @brief Frees an allocation of host-accessible returned from `aligned_host_allocate`. * - * Allocations returned from `aligned_allocate` *MUST* be freed by calling - * `aligned_deallocate` with the same arguments for `bytes` and `alignment` - * with a compatible unary `dealloc` callable capable of freeing the memory - * returned from `alloc`. + * Allocations returned from `aligned_host_allocate` *MUST* be freed by calling + * `aligned_host_deallocate` with the same arguments for `bytes` and `alignment` with a compatible + * unary `dealloc` callable capable of freeing the memory returned from `alloc`. * * @param p The aligned pointer to deallocate - * @param bytes The number of bytes requested from `aligned_allocate` - * @param alignment The alignment required from `aligned_allocate` - * @param dealloc A unary callable capable of freeing memory returned from - * `alloc` in `aligned_allocate`. - * @tparam Dealloc A unary callable type that deallocates memory. + * @param bytes The number of bytes requested from `aligned_host_allocate` + * @param alignment The alignment required from `aligned_host_allocate` + * @param dealloc A unary callable capable of freeing host-accessible memory returned from `alloc` + * in `aligned_host_allocate`. + * @tparam Dealloc A unary callable type that deallocates host-accessible memory. */ template // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc) +void aligned_host_deallocate(void* ptr, + [[maybe_unused]] std::size_t bytes, + [[maybe_unused]] std::size_t alignment, + Dealloc dealloc) noexcept { - (void)alignment; + assert(rmm::is_supported_alignment(alignment)); // Get offset from the location immediately prior to the aligned pointer // NOLINTNEXTLINE diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 4bb272df3..ccb294d21 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -65,7 +65,7 @@ class new_delete_resource final : public host_memory_resource { alignment = (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; - return rmm::detail::aligned_allocate( + return rmm::detail::aligned_host_allocate( bytes, alignment, [](std::size_t size) { return ::operator new(size); }); } @@ -86,7 +86,7 @@ class new_delete_resource final : public host_memory_resource { std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { - rmm::detail::aligned_deallocate( + rmm::detail::aligned_host_deallocate( ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); } }; diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index b5c273ef5..cb8524999 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -147,7 +147,7 @@ class pinned_memory_resource final : public host_memory_resource { alignment = (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; - return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { + return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; auto status = cudaMallocHost(&ptr, size); if (cudaSuccess != status) { throw std::bad_alloc{}; } @@ -173,7 +173,7 @@ class pinned_memory_resource final : public host_memory_resource { std::size_t alignment = alignof(std::max_align_t)) override { if (nullptr == ptr) { return; } - rmm::detail::aligned_deallocate( + rmm::detail::aligned_host_deallocate( ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } }; diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp new file mode 100644 index 000000000..c51af4182 --- /dev/null +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +namespace rmm::mr { + +/** + * @brief Memory resource class for allocating pinned host memory. + * + * This class uses CUDA's `cudaHostAlloc` to allocate pinned host memory. It implements the + * `cuda::mr::memory_resource` and `cuda::mr::device_memory_resource` concepts, and + * the `cuda::mr::host_accessible` and `cuda::mr::device_accessible` properties. + */ +class pinned_host_memory_resource { + public: + // Disable clang-tidy complaining about the easily swappable size and alignment parameters + // of allocate and deallocate + // NOLINTBEGIN(bugprone-easily-swappable-parameters) + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * reason. + * + * @param bytes The size, in bytes, of the allocation. + * @param alignment Alignment in bytes. Default alignment is used if unspecified. + * + * @return Pointer to the newly allocated memory. + */ + static void* allocate(std::size_t bytes, + [[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) + { + // don't allocate anything if the user requested zero bytes + if (0 == bytes) { return nullptr; } + + return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { + void* ptr{nullptr}; + RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault)); + return ptr; + }); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param alignment Alignment in bytes. Default alignment is used if unspecified. + */ + static void deallocate(void* ptr, + std::size_t bytes, + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept + { + rmm::detail::aligned_host_deallocate( + ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); + } + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes. + * + * @note Stream argument is ignored and behavior is identical to allocate. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * error. + * + * @param bytes The size, in bytes, of the allocation. + * @param stream CUDA stream on which to perform the allocation (ignored). + * @return Pointer to the newly allocated memory. + */ + static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream) + { + return allocate(bytes); + } + + /** + * @brief Allocates pinned host memory of size at least \p bytes bytes and alignment \p alignment. + * + * @note Stream argument is ignored and behavior is identical to allocate. + * + * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a + * CUDA out of memory error. + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other + * error. + * + * @param bytes The size, in bytes, of the allocation. + * @param alignment Alignment in bytes. + * @param stream CUDA stream on which to perform the allocation (ignored). + * @return Pointer to the newly allocated memory. + */ + static void* allocate_async(std::size_t bytes, + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) + { + return allocate(bytes, alignment); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes. + * + * @note Stream argument is ignored and behavior is identical to deallocate. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param stream CUDA stream on which to perform the deallocation (ignored). + */ + static void deallocate_async(void* ptr, + std::size_t bytes, + [[maybe_unused]] cuda::stream_ref stream) noexcept + { + return deallocate(ptr, bytes); + } + + /** + * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p + * alignment bytes. + * + * @note Stream argument is ignored and behavior is identical to deallocate. + * + * @throws Nothing. + * + * @param ptr Pointer to be deallocated. + * @param bytes Size of the allocation. + * @param alignment Alignment in bytes. + * @param stream CUDA stream on which to perform the deallocation (ignored). + */ + static void deallocate_async(void* ptr, + std::size_t bytes, + std::size_t alignment, + [[maybe_unused]] cuda::stream_ref stream) noexcept + { + return deallocate(ptr, bytes, alignment); + } + // NOLINTEND(bugprone-easily-swappable-parameters) + + /** + * @briefreturn{true if the specified resource is the same type as this resource.} + */ + bool operator==(const pinned_host_memory_resource&) const { return true; } + + /** + * @briefreturn{true if the specified resource is not the same type as this resource, otherwise + * false.} + */ + bool operator!=(const pinned_host_memory_resource&) const { return false; } + + /** + * @brief Query whether the resource supports reporting free and available memory. + * + * @return false + */ + static bool supports_get_mem_info() { return false; } + + /** + * @brief Query the total amount of memory and free memory available for allocation by this + * resource. + * + * @throws nothing + * + * @return std::pair containing 0 for both total and free memory. + */ + [[nodiscard]] static std::pair get_mem_info(cuda::stream_ref) noexcept + { + return {0, 0}; + } + + /** + * @brief Enables the `cuda::mr::device_accessible` property + * + * This property declares that a `pinned_host_memory_resource` provides device accessible memory + */ + friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept + { + } + + /** + * @brief Enables the `cuda::mr::host_accessible` property + * + * This property declares that a `pinned_host_memory_resource` provides host accessible memory + */ + friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept + { + } +}; + +static_assert(cuda::mr::async_resource_with); +} // namespace rmm::mr diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 25ff76891..9826c10be 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -17,6 +17,7 @@ #pragma once #include "../../byte_literals.hpp" +#include "test_utils.hpp" #include #include @@ -35,8 +36,6 @@ #include -#include - #include #include @@ -50,17 +49,6 @@ using async_resource_ref = cuda::mr::async_resource_ref(index_distribution(generator) % active_allocations); active_allocations--; @@ -317,7 +305,7 @@ inline void test_mixed_random_async_allocation_free(async_resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index ef4b4bc80..3808ec6f3 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -17,6 +17,7 @@ #pragma once #include "../../byte_literals.hpp" +#include "test_utils.hpp" #include #include @@ -32,11 +33,10 @@ #include #include #include +#include #include -#include - #include #include #include @@ -45,17 +45,6 @@ namespace rmm::test { -/** - * @brief Returns if a pointer points to a device memory or managed memory - * allocation. - */ -inline bool is_device_memory(void* ptr) -{ - cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } - return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged); -} - enum size_in_bytes : size_t {}; constexpr auto default_num_allocations{100}; @@ -75,8 +64,8 @@ inline void test_get_current_device_resource() EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); - EXPECT_TRUE(is_device_memory(ptr)); + EXPECT_TRUE(is_properly_aligned(ptr)); + EXPECT_TRUE(is_device_accessible_memory(ptr)); rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } @@ -87,8 +76,8 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(bytes); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); - EXPECT_TRUE(is_device_memory(ptr)); + EXPECT_TRUE(is_properly_aligned(ptr)); + EXPECT_TRUE(is_device_accessible_memory(ptr)); mr->deallocate(ptr, bytes); if (not stream.is_default()) { stream.synchronize(); } } @@ -155,7 +144,7 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(is_properly_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { @@ -197,7 +186,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(is_properly_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -246,6 +235,8 @@ struct mr_allocation_test : public mr_test {}; /// MR factory functions inline auto make_cuda() { return std::make_shared(); } +inline auto make_host_pinned() { return std::make_shared(); } + inline auto make_cuda_async() { if (rmm::detail::async_alloc::is_supported()) { @@ -262,6 +253,12 @@ inline auto make_pool() make_cuda(), rmm::percent_of_free_device_memory(50)); } +inline auto make_host_pinned_pool() +{ + return rmm::mr::make_owning_wrapper( + make_host_pinned(), 2_GiB, 8_GiB); +} + inline auto make_arena() { return rmm::mr::make_owning_wrapper(make_cuda()); diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index f6141e90f..bf513adda 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -31,6 +31,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceTests, #endif mr_factory{"Managed", &make_managed}, mr_factory{"Pool", &make_pool}, + mr_factory{"HostPinnedPool", &make_host_pinned_pool}, mr_factory{"Arena", &make_arena}, mr_factory{"Binning", &make_binning}, mr_factory{"Fixed_Size", &make_fixed_size}), @@ -45,6 +46,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceAllocationTests, #endif mr_factory{"Managed", &make_managed}, mr_factory{"Pool", &make_pool}, + mr_factory{"HostPinnedPool", &make_host_pinned_pool}, mr_factory{"Arena", &make_arena}, mr_factory{"Binning", &make_binning}), [](auto const& info) { return info.param.name; }); diff --git a/tests/mr/device/test_utils.hpp b/tests/mr/device/test_utils.hpp new file mode 100644 index 000000000..932a72a7e --- /dev/null +++ b/tests/mr/device/test_utils.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace rmm::test { + +/** + * @brief Returns if a pointer points to a device memory or managed memory + * allocation. + */ +inline bool is_device_accessible_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or + ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr)); +} + +inline bool is_host_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return attributes.type == cudaMemoryTypeHost; +} + +inline bool is_properly_aligned(void* ptr) +{ + if (is_host_memory(ptr)) { return rmm::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); } + return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT); +} + +} // namespace rmm::test From 6c904f7b960c3b4944ea19281a6c7e0e16b55275 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 23 Jan 2024 20:27:54 +1100 Subject: [PATCH 21/23] Make device_memory_resource::do_get_mem_info() and supports_get_mem_info() nonvirtual. Remove derived implementations and calls in RMM (#1430) Closes #1426 As part of #1388, this PR contributes to deprecating and removing all `get_mem_info` functionality from memory resources. This first PR makes these methods optional without deprecating them. - Makes `rmm::mr::device_memory_resource::supports_get_mem_info()` nonvirtual (and always return false) - Makes `rmm::mr::device_memory_resource::do_get_mem_info()` nonvirtual (and always return `{0, 0}`). - Removes all derived implementations of the above. - Removes all calls to the above. Authors: - Mark Harris (https://github.com/harrism) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/rmm/pull/1430 --- .../utilities/simulated_memory_resource.hpp | 21 +-------------- .../mr/device/aligned_resource_adaptor.hpp | 26 ------------------- .../rmm/mr/device/arena_memory_resource.hpp | 19 -------------- .../rmm/mr/device/binning_memory_resource.hpp | 21 --------------- .../mr/device/callback_memory_resource.hpp | 8 +----- .../mr/device/cuda_async_memory_resource.hpp | 20 -------------- .../cuda_async_view_memory_resource.hpp | 20 -------------- .../rmm/mr/device/cuda_memory_resource.hpp | 24 +---------------- .../rmm/mr/device/device_memory_resource.hpp | 7 +++-- .../failure_callback_resource_adaptor.hpp | 26 +------------------ .../mr/device/fixed_size_memory_resource.hpp | 23 +--------------- .../mr/device/limiting_resource_adaptor.hpp | 24 ----------------- .../mr/device/logging_resource_adaptor.hpp | 26 +------------------ .../rmm/mr/device/managed_memory_resource.hpp | 26 +------------------ include/rmm/mr/device/owning_wrapper.hpp | 24 +---------------- .../rmm/mr/device/pool_memory_resource.hpp | 24 +---------------- .../mr/device/statistics_resource_adaptor.hpp | 25 +----------------- .../device/thread_safe_resource_adaptor.hpp | 26 +------------------ .../mr/device/tracking_resource_adaptor.hpp | 25 +----------------- .../rmm/mr/host/pinned_memory_resource.hpp | 20 -------------- tests/device_check_resource_adaptor.hpp | 13 +--------- tests/mock_resource.hpp | 2 -- tests/mr/device/adaptor_tests.cpp | 16 +----------- tests/mr/device/aligned_mr_tests.cpp | 6 ----- tests/mr/device/arena_mr_tests.cpp | 4 --- tests/mr/device/failure_callback_mr_tests.cpp | 6 ----- tests/mr/device/mr_tests.cpp | 25 ------------------ tests/mr/device/pool_mr_tests.cpp | 1 - 28 files changed, 19 insertions(+), 489 deletions(-) diff --git a/benchmarks/utilities/simulated_memory_resource.hpp b/benchmarks/utilities/simulated_memory_resource.hpp index b7965a021..00f6a5649 100644 --- a/benchmarks/utilities/simulated_memory_resource.hpp +++ b/benchmarks/utilities/simulated_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,13 +59,6 @@ class simulated_memory_resource final : public device_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept override { return false; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return false - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } - private: /** * @brief Allocates memory of size at least `bytes`. @@ -95,18 +88,6 @@ class simulated_memory_resource final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override {} - /** - * @brief Get free and available memory for memory resource. - * - * @param stream to execute on. - * @return std::pair containing free_size and total_size of memory. - */ - [[nodiscard]] std::pair do_get_mem_info( - cuda_stream_view stream) const override - { - return std::make_pair(0, 0); - } - char* begin_{}; char* end_{}; }; diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index be7c3036c..dda510e52 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -96,16 +96,6 @@ class aligned_resource_adaptor final : public device_memory_resource { return upstream_->supports_streams(); } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - /** * @brief The default alignment used by the adaptor. */ @@ -183,22 +173,6 @@ class aligned_resource_adaptor final : public device_memory_resource { alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_; } - /** - * @brief Get free and available memory from upstream resource. - * - * The free size may not be fully allocatable because of alignment requirements. - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair containing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - cuda_stream_view stream) const override - { - return upstream_->get_mem_info(stream); - } - /** * @brief Calculate the allocation size needed from upstream to account for alignments of both the * size and the base pointer. diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 1b821b440..658a107d6 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -118,13 +118,6 @@ class arena_memory_resource final : public device_memory_resource { */ bool supports_streams() const noexcept override { return true; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool false. - */ - bool supports_get_mem_info() const noexcept override { return false; } - private: using global_arena = rmm::mr::detail::arena::global_arena; using arena = rmm::mr::detail::arena::arena; @@ -312,18 +305,6 @@ class arena_memory_resource final : public device_memory_resource { } } - /** - * @brief Get free and available memory for memory resource. - * - * @param stream to execute on. - * @return std::pair containing free_size and total_size of memory. - */ - std::pair do_get_mem_info( - [[maybe_unused]] cuda_stream_view stream) const override - { - return std::make_pair(0, 0); - } - /** * Dump memory to log. * diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp index 2a9975b18..33b92d4d7 100644 --- a/include/rmm/mr/device/binning_memory_resource.hpp +++ b/include/rmm/mr/device/binning_memory_resource.hpp @@ -106,13 +106,6 @@ class binning_memory_resource final : public device_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept override { return true; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return false - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } - /** * @brief Get the upstream memory_resource object. * @@ -197,20 +190,6 @@ class binning_memory_resource final : public device_memory_resource { if (res != nullptr) { res->deallocate(ptr, bytes, stream); } } - /** - * @brief Get free and available memory for memory resource - * - * @throws std::runtime_error if we could not get free / total memory - * - * @param stream the stream being executed on - * @return std::pair with available and free memory for resource - */ - [[nodiscard]] std::pair do_get_mem_info( - [[maybe_unused]] cuda_stream_view stream) const override - { - return std::make_pair(0, 0); - } - Upstream* upstream_mr_; // The upstream memory_resource from which to allocate blocks. std::vector>> owned_bin_resources_; diff --git a/include/rmm/mr/device/callback_memory_resource.hpp b/include/rmm/mr/device/callback_memory_resource.hpp index 36802c83a..11270ebe2 100644 --- a/include/rmm/mr/device/callback_memory_resource.hpp +++ b/include/rmm/mr/device/callback_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -138,13 +138,7 @@ class callback_memory_resource final : public device_memory_resource { deallocate_callback_(ptr, bytes, stream, deallocate_callback_arg_); } - [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override - { - throw std::runtime_error("cannot get free / total memory"); - } - [[nodiscard]] bool supports_streams() const noexcept override { return false; } - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } allocate_callback_t allocate_callback_; deallocate_callback_t deallocate_callback_; diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index f8295c6f6..b1d010cd6 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -164,13 +164,6 @@ class cuda_async_memory_resource final : public device_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept override { return true; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return false - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } - private: #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT cuda_async_view_memory_resource pool_{}; @@ -232,19 +225,6 @@ class cuda_async_memory_resource final : public device_memory_resource { return async_mr != nullptr; #endif } - - /** - * @brief Get free and available memory for memory resource - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - rmm::cuda_stream_view) const override - { - return std::make_pair(0, 0); - } }; /** @} */ // end of group diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 562944669..553c9a708 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -99,13 +99,6 @@ class cuda_async_view_memory_resource final : public device_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept override { return true; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return true - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } - private: #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT cudaMemPool_t cuda_pool_handle_{}; @@ -169,19 +162,6 @@ class cuda_async_view_memory_resource final : public device_memory_resource { { return dynamic_cast(&other) != nullptr; } - - /** - * @brief Get free and available memory for memory resource - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - rmm::cuda_stream_view) const override - { - return std::make_pair(0, 0); - } }; /** @} */ // end of group diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp index 256899776..284e49793 100644 --- a/include/rmm/mr/device/cuda_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,13 +51,6 @@ class cuda_memory_resource final : public device_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept override { return false; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return true - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } - private: /** * @brief Allocates memory of size at least \p bytes. @@ -108,21 +101,6 @@ class cuda_memory_resource final : public device_memory_resource { { return dynamic_cast(&other) != nullptr; } - - /** - * @brief Get free and available memory for memory resource - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override - { - std::size_t free_size{}; - std::size_t total_size{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); - return std::make_pair(free_size, total_size); - } }; /** @} */ // end of group } // namespace rmm::mr diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 55006f9b0..fd8a71c80 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -306,7 +306,7 @@ class device_memory_resource { * * @return bool true if the resource supports get_mem_info, false otherwise. */ - [[nodiscard]] virtual bool supports_get_mem_info() const noexcept = 0; + [[nodiscard]] virtual bool supports_get_mem_info() const noexcept { return false; }; /** * @brief Queries the amount of free and total memory for the resource. @@ -384,7 +384,10 @@ class device_memory_resource { * @return std::pair with available and free memory for resource */ [[nodiscard]] virtual std::pair do_get_mem_info( - cuda_stream_view stream) const = 0; + cuda_stream_view stream) const + { + return {0, 0}; + } }; static_assert(cuda::mr::async_resource_with); /** @} */ // end of group diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp index f8cbe8597..0f8c4b020 100644 --- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp +++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,16 +135,6 @@ class failure_callback_resource_adaptor final : public device_memory_resource { return upstream_->supports_streams(); } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - private: /** * @brief Allocates memory of size at least `bytes` using the upstream @@ -199,20 +189,6 @@ class failure_callback_resource_adaptor final : public device_memory_resource { : upstream_->is_equal(other); } - /** - * @brief Get free and available memory from upstream resource. - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - cuda_stream_view stream) const override - { - return upstream_->get_mem_info(stream); - } - Upstream* upstream_; // the upstream resource used for satisfying allocation requests failure_callback_t callback_; void* callback_arg_; diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index 91cc95c53..b7a3babee 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -104,13 +104,6 @@ class fixed_size_memory_resource */ [[nodiscard]] bool supports_streams() const noexcept override { return true; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return false - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } - /** * @brief Get the upstream memory_resource object. * @@ -211,20 +204,6 @@ class fixed_size_memory_resource return block_type{ptr}; } - /** - * @brief Get free and available memory for memory resource - * - * @throws std::runtime_error if we could not get free / total memory - * - * @param stream the stream being executed on - * @return std::pair with available and free memory for resource - */ - [[nodiscard]] std::pair do_get_mem_info( - [[maybe_unused]] cuda_stream_view stream) const override - { - return std::make_pair(0, 0); - } - /** * @brief free all memory allocated using the upstream resource. * @@ -244,7 +223,7 @@ class fixed_size_memory_resource { lock_guard lock(this->get_mutex()); - auto const [free, total] = get_upstream()->get_mem_info(rmm::cuda_stream_default); + auto const [free, total] = rmm::available_device_memory(); std::cout << "GPU free memory: " << free << " total: " << total << "\n"; std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n"; diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index 2123c3cac..e10a453c5 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -88,16 +88,6 @@ class limiting_resource_adaptor final : public device_memory_resource { return upstream_->supports_streams(); } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - /** * @brief Query the number of bytes that have been allocated. Note that * this can not be used to know how large of an allocation is possible due @@ -178,20 +168,6 @@ class limiting_resource_adaptor final : public device_memory_resource { return upstream_->is_equal(other); } - /** - * @brief Get free and available memory from upstream resource. - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - [[maybe_unused]] cuda_stream_view stream) const override - { - return {allocation_limit_ - allocated_bytes_, allocation_limit_}; - } - // maximum bytes this allocator is allowed to allocate. std::size_t allocation_limit_; diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index 781571157..455cde4c6 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -154,16 +154,6 @@ class logging_resource_adaptor final : public device_memory_resource { return upstream_->supports_streams(); } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - /** * @brief Flush logger contents. */ @@ -295,20 +285,6 @@ class logging_resource_adaptor final : public device_memory_resource { return upstream_->is_equal(other); } - /** - * @brief Get free and available memory from upstream resource. - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - cuda_stream_view stream) const override - { - return upstream_->get_mem_info(stream); - } - // make_logging_adaptor needs access to private get_default_filename template // NOLINTNEXTLINE(readability-redundant-declaration) diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp index dfa7710bf..5b0a80426 100644 --- a/include/rmm/mr/device/managed_memory_resource.hpp +++ b/include/rmm/mr/device/managed_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,13 +51,6 @@ class managed_memory_resource final : public device_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept override { return false; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return true - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } - private: /** * @brief Allocates memory of size at least \p bytes. @@ -112,23 +105,6 @@ class managed_memory_resource final : public device_memory_resource { { return dynamic_cast(&other) != nullptr; } - - /** - * @brief Get free and available memory for memory resource - * - * @throws rmm::cuda_error if unable to retrieve memory info - * - * @param stream to execute on - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - [[maybe_unused]] cuda_stream_view stream) const override - { - std::size_t free_size{}; - std::size_t total_size{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); - return std::make_pair(free_size, total_size); - } }; /** @} */ // end of group diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp index da513796d..7dd160265 100644 --- a/include/rmm/mr/device/owning_wrapper.hpp +++ b/include/rmm/mr/device/owning_wrapper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -165,14 +165,6 @@ class owning_wrapper : public device_memory_resource { return wrapped().supports_streams(); } - /** - * @briefreturn{true if the wrapped resource supports get_mem_info, false otherwise} - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override - { - return wrapped().supports_get_mem_info(); - } - private: /** * @brief Allocates memory using the wrapped resource. @@ -220,20 +212,6 @@ class owning_wrapper : public device_memory_resource { return wrapped().is_equal(other); } - /** - * @brief Get free and available memory from upstream resource. - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - cuda_stream_view stream) const override - { - return wrapped().get_mem_info(stream); - } - upstream_tuple upstreams_; ///< The owned upstream resources std::unique_ptr wrapped_; ///< The wrapped resource that uses the upstreams }; diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 63239e750..44f8f96c4 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -236,13 +236,6 @@ class pool_memory_resource final */ [[nodiscard]] bool supports_streams() const noexcept override { return true; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool false - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } - /** * @brief Get the upstream memory_resource object. * @@ -487,7 +480,7 @@ class pool_memory_resource final { lock_guard lock(this->get_mutex()); - auto const [free, total] = upstream_mr_->get_mem_info(rmm::cuda_stream_default); + auto const [free, total] = rmm::available_device_memory(); std::cout << "GPU free memory: " << free << " total: " << total << "\n"; std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n"; @@ -528,21 +521,6 @@ class pool_memory_resource final return {largest, total}; } - /** - * @brief Get free and available memory for memory resource - * - * @throws nothing - * - * @param stream to execute on - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - cuda_stream_view stream) const override - { - // TODO implement this - return {0, 0}; - } - private: Upstream* upstream_mr_; // The "heap" to allocate the pool from std::size_t current_pool_size_{}; diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp index dd186efc0..ed38a20f4 100644 --- a/include/rmm/mr/device/statistics_resource_adaptor.hpp +++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -119,16 +119,6 @@ class statistics_resource_adaptor final : public device_memory_resource { */ bool supports_streams() const noexcept override { return upstream_->supports_streams(); } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - /** * @brief Returns a `counter` struct for this adaptor containing the current, * peak, and total number of allocated bytes for this @@ -222,19 +212,6 @@ class statistics_resource_adaptor final : public device_memory_resource { : upstream_->is_equal(other); } - /** - * @brief Get free and available memory from upstream resource. - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - std::pair do_get_mem_info(cuda_stream_view stream) const override - { - return upstream_->get_mem_info(stream); - } - counter bytes_; // peak, current and total allocated bytes counter allocations_; // peak, current and total allocation count std::shared_timed_mutex mutable mtx_; // mutex for thread safe access to allocations_ diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp index 13184b257..15ad3f0a5 100644 --- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp +++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,16 +76,6 @@ class thread_safe_resource_adaptor final : public device_memory_resource { */ bool supports_streams() const noexcept override { return upstream_->supports_streams(); } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - private: /** * @brief Allocates memory of size at least `bytes` using the upstream @@ -134,20 +124,6 @@ class thread_safe_resource_adaptor final : public device_memory_resource { return upstream_->is_equal(other); } - /** - * @brief Get free and available memory from upstream resource. - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - std::pair do_get_mem_info(cuda_stream_view stream) const override - { - lock_t lock(mtx); - return upstream_->get_mem_info(stream); - } - std::mutex mutable mtx; // mutex for thread safe access to upstream Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests }; diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp index 271ccab23..2ad88f079 100644 --- a/include/rmm/mr/device/tracking_resource_adaptor.hpp +++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -118,16 +118,6 @@ class tracking_resource_adaptor final : public device_memory_resource { */ bool supports_streams() const noexcept override { return upstream_->supports_streams(); } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool true if the upstream resource supports get_mem_info, false otherwise. - */ - bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - /** * @brief Get the outstanding allocations map * @@ -277,19 +267,6 @@ class tracking_resource_adaptor final : public device_memory_resource { : upstream_->is_equal(other); } - /** - * @brief Get free and available memory from upstream resource. - * - * @throws rmm::cuda_error if unable to retrieve memory info. - * - * @param stream Stream on which to get the mem info. - * @return std::pair contaiing free_size and total_size of memory - */ - std::pair do_get_mem_info(cuda_stream_view stream) const override - { - return upstream_->get_mem_info(stream); - } - bool capture_stacks_; // whether or not to capture call stacks std::map allocations_; // map of active allocations std::atomic allocated_bytes_; // number of bytes currently allocated diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index cb8524999..9f68b70fa 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -56,26 +56,6 @@ class pinned_memory_resource final : public host_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept { return false; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return bool false. - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept { return false; } - - /** - * @brief Queries the amount of free and total memory for the resource. - * - * @param stream the stream whose memory manager we want to retrieve - * - * @returns a pair containing the free memory in bytes in .first and total amount of memory in - * .second - */ - [[nodiscard]] std::pair get_mem_info(cuda_stream_view stream) const - { - return std::make_pair(0, 0); - } - /** * @brief Pretend to support the allocate_async interface, falling back to stream 0 * diff --git a/tests/device_check_resource_adaptor.hpp b/tests/device_check_resource_adaptor.hpp index f9ad4cf70..6ff82e77a 100644 --- a/tests/device_check_resource_adaptor.hpp +++ b/tests/device_check_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,11 +32,6 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour return upstream_->supports_streams(); } - [[nodiscard]] bool supports_get_mem_info() const noexcept override - { - return upstream_->supports_get_mem_info(); - } - [[nodiscard]] device_memory_resource* get_upstream() const noexcept { return upstream_; } private: @@ -66,12 +61,6 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour return upstream_->is_equal(other); } - [[nodiscard]] std::pair do_get_mem_info( - rmm::cuda_stream_view stream) const override - { - return upstream_->get_mem_info(stream); - } - rmm::cuda_device_id device_id; rmm::mr::device_memory_resource* upstream_{}; }; diff --git a/tests/mock_resource.hpp b/tests/mock_resource.hpp index 0436e2a2a..d8eb4e5b9 100644 --- a/tests/mock_resource.hpp +++ b/tests/mock_resource.hpp @@ -24,11 +24,9 @@ namespace rmm::test { class mock_resource : public rmm::mr::device_memory_resource { public: MOCK_METHOD(bool, supports_streams, (), (const, override, noexcept)); - MOCK_METHOD(bool, supports_get_mem_info, (), (const, override, noexcept)); MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override)); MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (override)); using size_pair = std::pair; - MOCK_METHOD(size_pair, do_get_mem_info, (cuda_stream_view), (const, override)); }; } // namespace rmm::test diff --git a/tests/mr/device/adaptor_tests.cpp b/tests/mr/device/adaptor_tests.cpp index 98fc3a429..e43818091 100644 --- a/tests/mr/device/adaptor_tests.cpp +++ b/tests/mr/device/adaptor_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -147,20 +147,6 @@ TYPED_TEST(AdaptorTest, SupportsStreams) EXPECT_EQ(this->mr->supports_streams(), this->cuda.supports_streams()); } -TYPED_TEST(AdaptorTest, MemInfo) -{ - EXPECT_EQ(this->mr->supports_get_mem_info(), this->cuda.supports_get_mem_info()); - - auto [free, total] = this->mr->get_mem_info(rmm::cuda_stream_default); - - if (this->mr->supports_get_mem_info()) { - EXPECT_NE(total, 0); - } else { - EXPECT_EQ(free, 0); - EXPECT_EQ(total, 0); - } -} - TYPED_TEST(AdaptorTest, AllocFree) { void* ptr{nullptr}; diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index 5fbb4b8f1..4d149c182 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -72,12 +72,6 @@ TEST(AlignedTest, SupportsGetMemInfo) { mock_resource mock; aligned_mock mr{&mock}; - - EXPECT_CALL(mock, supports_get_mem_info()).WillOnce(Return(true)); - EXPECT_TRUE(mr.supports_get_mem_info()); - - EXPECT_CALL(mock, supports_get_mem_info()).WillOnce(Return(false)); - EXPECT_FALSE(mr.supports_get_mem_info()); } TEST(AlignedTest, DefaultAllocationAlignmentPassthrough) diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 1068e0cf0..fbd96599e 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -596,10 +596,6 @@ TEST_F(ArenaTest, FeatureSupport) // NOLINT { arena_mr mr{rmm::mr::get_current_device_resource(), 1_MiB}; EXPECT_TRUE(mr.supports_streams()); - EXPECT_FALSE(mr.supports_get_mem_info()); - auto [free, total] = mr.get_mem_info(rmm::cuda_stream_default); - EXPECT_EQ(free, 0); - EXPECT_EQ(total, 0); } } // namespace diff --git a/tests/mr/device/failure_callback_mr_tests.cpp b/tests/mr/device/failure_callback_mr_tests.cpp index 79acd5c7e..12fbff268 100644 --- a/tests/mr/device/failure_callback_mr_tests.cpp +++ b/tests/mr/device/failure_callback_mr_tests.cpp @@ -61,14 +61,8 @@ class always_throw_memory_resource final : public mr::device_memory_resource { throw ExceptionType{"foo"}; } void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override{}; - [[nodiscard]] std::pair do_get_mem_info( - cuda_stream_view stream) const override - { - return {0, 0}; - } [[nodiscard]] bool supports_streams() const noexcept override { return false; } - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } }; TEST(FailureCallbackTest, DifferentExceptionTypes) diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index bf513adda..5f9be9f54 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -97,31 +97,6 @@ TEST_P(mr_test, SupportsStreams) } } -TEST_P(mr_test, GetMemInfo) -{ - if (this->mr->supports_get_mem_info()) { - const auto allocation_size{16 * 256}; - { - auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); - EXPECT_TRUE(free >= allocation_size); - } - - void* ptr{nullptr}; - ptr = this->mr->allocate(allocation_size); - - { - auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); - EXPECT_TRUE(free >= allocation_size); - } - - this->mr->deallocate(ptr, allocation_size); - } else { - auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); - EXPECT_EQ(free, 0); - EXPECT_EQ(total, 0); - } -} - // Simple reproducer for https://github.com/rapidsai/rmm/issues/861 TEST_P(mr_test, AllocationsAreDifferentDefaultStream) { diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index a2793386f..7193ef301 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -213,7 +213,6 @@ class fake_async_resource { // To model stream_resource [[nodiscard]] bool supports_streams() const noexcept { return false; } - [[nodiscard]] bool supports_get_mem_info() const noexcept { return false; } private: void* do_allocate(std::size_t bytes, cuda_stream_view) { return nullptr; } From f32d35b48fe9c7ad680cd2c78535a2d463d2e73b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 6 Feb 2024 17:40:07 -0800 Subject: [PATCH 22/23] Exclude tests from builds (#1459) --- python/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyproject.toml b/python/pyproject.toml index 1e901b1ab..204d81c3c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -114,6 +114,7 @@ build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.minimum-version = "3.26.4" ninja.make-fallback = true +sdist.exclude = ["*tests*"] sdist.reproducible = true wheel.packages = ["rmm"] From 0f1ff2749cdec780227d5e75f8e10e608007054a Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Mon, 12 Feb 2024 15:44:04 -0500 Subject: [PATCH 23/23] Update Changelog [skip ci] --- CHANGELOG.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 930ad9490..9d9ccfd94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,47 @@ +# RMM 24.02.00 (12 Feb 2024) + +## 🚨 Breaking Changes + +- Make device_memory_resource::do_get_mem_info() and supports_get_mem_info() not pure virtual. Remove derived implementations and calls in RMM ([#1430](https://github.com/rapidsai/rmm/pull/1430)) [@harrism](https://github.com/harrism) +- Deprecate detail::available_device_memory, most detail/aligned.hpp utilities, and optional pool_memory_resource initial size ([#1424](https://github.com/rapidsai/rmm/pull/1424)) [@harrism](https://github.com/harrism) +- Require explicit pool size in `pool_memory_resource` and move some things out of detail namespace ([#1417](https://github.com/rapidsai/rmm/pull/1417)) [@harrism](https://github.com/harrism) +- Remove HTML builds of librmm ([#1415](https://github.com/rapidsai/rmm/pull/1415)) [@vyasr](https://github.com/vyasr) +- Update to CCCL 2.2.0. ([#1404](https://github.com/rapidsai/rmm/pull/1404)) [@bdice](https://github.com/bdice) +- Switch to scikit-build-core ([#1287](https://github.com/rapidsai/rmm/pull/1287)) [@vyasr](https://github.com/vyasr) + +## 🐛 Bug Fixes + +- Exclude tests from builds ([#1459](https://github.com/rapidsai/rmm/pull/1459)) [@vyasr](https://github.com/vyasr) +- Update CODEOWNERS ([#1410](https://github.com/rapidsai/rmm/pull/1410)) [@raydouglass](https://github.com/raydouglass) +- Correct signatures for torch allocator plug in ([#1407](https://github.com/rapidsai/rmm/pull/1407)) [@wence-](https://github.com/wence-) +- Fix Arena MR to support simultaneous access by PTDS and other streams ([#1395](https://github.com/rapidsai/rmm/pull/1395)) [@tgravescs](https://github.com/tgravescs) +- Fix else-after-throw clang tidy error ([#1391](https://github.com/rapidsai/rmm/pull/1391)) [@harrism](https://github.com/harrism) + +## 📖 Documentation + +- remove references to setup.py in docs ([#1420](https://github.com/rapidsai/rmm/pull/1420)) [@jameslamb](https://github.com/jameslamb) +- Remove HTML builds of librmm ([#1415](https://github.com/rapidsai/rmm/pull/1415)) [@vyasr](https://github.com/vyasr) +- Update GPU support docs to drop Pascal ([#1413](https://github.com/rapidsai/rmm/pull/1413)) [@harrism](https://github.com/harrism) + +## 🚀 New Features + +- Make device_memory_resource::do_get_mem_info() and supports_get_mem_info() not pure virtual. Remove derived implementations and calls in RMM ([#1430](https://github.com/rapidsai/rmm/pull/1430)) [@harrism](https://github.com/harrism) +- Deprecate detail::available_device_memory, most detail/aligned.hpp utilities, and optional pool_memory_resource initial size ([#1424](https://github.com/rapidsai/rmm/pull/1424)) [@harrism](https://github.com/harrism) +- Add a host-pinned memory resource that can be used as upstream for `pool_memory_resource`. ([#1392](https://github.com/rapidsai/rmm/pull/1392)) [@harrism](https://github.com/harrism) + +## 🛠️ Improvements + +- Remove usages of rapids-env-update ([#1423](https://github.com/rapidsai/rmm/pull/1423)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Refactor CUDA versions in dependencies.yaml. ([#1422](https://github.com/rapidsai/rmm/pull/1422)) [@bdice](https://github.com/bdice) +- Require explicit pool size in `pool_memory_resource` and move some things out of detail namespace ([#1417](https://github.com/rapidsai/rmm/pull/1417)) [@harrism](https://github.com/harrism) +- Update dependencies.yaml to support CUDA 12.*. ([#1414](https://github.com/rapidsai/rmm/pull/1414)) [@bdice](https://github.com/bdice) +- Define python dependency range as a matrix fallback. ([#1409](https://github.com/rapidsai/rmm/pull/1409)) [@bdice](https://github.com/bdice) +- Use latest cuda-python within CUDA major version. ([#1406](https://github.com/rapidsai/rmm/pull/1406)) [@bdice](https://github.com/bdice) +- Update to CCCL 2.2.0. ([#1404](https://github.com/rapidsai/rmm/pull/1404)) [@bdice](https://github.com/bdice) +- Remove RMM_BUILD_WHEELS and standardize Python builds ([#1401](https://github.com/rapidsai/rmm/pull/1401)) [@vyasr](https://github.com/vyasr) +- Update to fmt 10.1.1 and spdlog 1.12.0. ([#1374](https://github.com/rapidsai/rmm/pull/1374)) [@bdice](https://github.com/bdice) +- Switch to scikit-build-core ([#1287](https://github.com/rapidsai/rmm/pull/1287)) [@vyasr](https://github.com/vyasr) + # RMM 23.12.00 (6 Dec 2023) ## 🚨 Breaking Changes