From 08ab53b5e137beb83676aecd3742bdeefee2ad0c Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Fri, 21 Feb 2025 02:33:08 -0500 Subject: [PATCH] Minor fixes on the file permission, host allocation flag, and worker thread initialization (#637) This PR makes the following minor fixes: - Use the correct file permission flags corresponding to the `644` code. - Use the correct flag for the `cuMemHostAlloc` call. - For the thread pool, replace the `thread_local` call-once section (which may negatively affect performance; see https://github.com/rapidsai/kvikio/pull/630#discussion_r1960285383) with more idiomatic worker thread initialization function. Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/kvikio/pull/637 --- cpp/include/kvikio/defaults.hpp | 7 ++- cpp/include/kvikio/file_handle.hpp | 4 +- cpp/include/kvikio/nvtx.hpp | 16 +++---- cpp/include/kvikio/parallel_operation.hpp | 13 ++---- cpp/include/kvikio/threadpool_wrapper.hpp | 57 +++++++++++++++++++++++ cpp/src/bounce_buffer.cpp | 3 +- cpp/src/defaults.cpp | 2 +- cpp/src/nvtx.cpp | 10 ++-- cpp/tests/test_basic_io.cpp | 2 +- 9 files changed, 83 insertions(+), 31 deletions(-) create mode 100644 cpp/include/kvikio/threadpool_wrapper.hpp diff --git a/cpp/include/kvikio/defaults.hpp b/cpp/include/kvikio/defaults.hpp index 563cf09456..999de3fb90 100644 --- a/cpp/include/kvikio/defaults.hpp +++ b/cpp/include/kvikio/defaults.hpp @@ -22,10 +22,9 @@ #include #include -#include - #include #include +#include /** * @brief KvikIO namespace. @@ -60,7 +59,7 @@ CompatMode getenv_or(std::string_view env_var_name, CompatMode default_val); */ class defaults { private: - BS::thread_pool _thread_pool{get_num_threads_from_env()}; + BS_thread_pool _thread_pool{get_num_threads_from_env()}; CompatMode _compat_mode; std::size_t _task_size; std::size_t _gds_threshold; @@ -156,7 +155,7 @@ class defaults { * * @return The the default thread pool instance. */ - [[nodiscard]] static BS::thread_pool& thread_pool(); + [[nodiscard]] static BS_thread_pool& thread_pool(); /** * @brief Get the number of threads in the default thread pool. diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index 3d31b96dc1..50e1c679c3 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -56,7 +56,9 @@ class FileHandle { friend class CompatModeManager; public: - static constexpr mode_t m644 = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH; + // 644 is a common setting of Unix file permissions: read and write for owner, read-only for group + // and others. + static constexpr mode_t m644 = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; FileHandle() noexcept = default; /** diff --git a/cpp/include/kvikio/nvtx.hpp b/cpp/include/kvikio/nvtx.hpp index fc401fd38a..d665af33c1 100644 --- a/cpp/include/kvikio/nvtx.hpp +++ b/cpp/include/kvikio/nvtx.hpp @@ -62,7 +62,7 @@ using nvtx_registered_string_type = nvtx3::registered_string_in get_next_color_and { static std::atomic_uint64_t call_counter{1ull}; auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); - auto& nvtx_color = nvtx_manager::get_color_by_index(call_idx); + auto& nvtx_color = NvtxManager::get_color_by_index(call_idx); return {nvtx_color, call_idx}; } @@ -57,17 +57,10 @@ std::future submit_task(F op, std::size_t file_offset, std::size_t devPtr_offset, std::uint64_t nvtx_payload = 0ull, - nvtx_color_type nvtx_color = nvtx_manager::default_color()) + nvtx_color_type nvtx_color = NvtxManager::default_color()) { return defaults::thread_pool().submit_task([=] { KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_payload, nvtx_color); - - // Rename the worker thread in the thread pool to improve clarity from nsys-ui. - // Note: This NVTX feature is currently not supported by nsys-ui. - thread_local std::once_flag call_once_per_thread; - std::call_once(call_once_per_thread, - [] { nvtx_manager::rename_current_thread("thread pool"); }); - return op(buf, size, file_offset, devPtr_offset); }); } @@ -94,7 +87,7 @@ std::future parallel_io(F op, std::size_t task_size, std::size_t devPtr_offset, std::uint64_t call_idx = 0, - nvtx_color_type nvtx_color = nvtx_manager::default_color()) + nvtx_color_type nvtx_color = NvtxManager::default_color()) { if (task_size == 0) { throw std::invalid_argument("`task_size` cannot be zero"); } diff --git a/cpp/include/kvikio/threadpool_wrapper.hpp b/cpp/include/kvikio/threadpool_wrapper.hpp new file mode 100644 index 0000000000..cb4640c047 --- /dev/null +++ b/cpp/include/kvikio/threadpool_wrapper.hpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include + +namespace kvikio { + +template +class thread_pool_wrapper : public pool_type { + public: + /** + * @brief Construct a new thread pool wrapper, and invoke a pre-defined initialization function in + * each worker thread. + * + * @param nthreads The number of threads to use. + */ + thread_pool_wrapper(unsigned int nthreads) : pool_type{nthreads, worker_thread_init_func} {} + + /** + * @brief Reset the number of threads in the thread pool, and invoke a pre-defined initialization + * function in each worker thread. + * + * @param nthreads The number of threads to use. + */ + void reset(unsigned int nthreads) { pool_type::reset(nthreads, worker_thread_init_func); } + + private: + inline static std::function worker_thread_init_func{[] { + KVIKIO_NVTX_SCOPED_RANGE("worker thread init", 0, NvtxManager::default_color()); + // Rename the worker thread in the thread pool to improve clarity from nsys-ui. + // Note: This NVTX feature is currently not supported by nsys-ui. + NvtxManager::rename_current_thread("thread pool"); + }}; +}; + +using BS_thread_pool = thread_pool_wrapper; + +} // namespace kvikio diff --git a/cpp/src/bounce_buffer.cpp b/cpp/src/bounce_buffer.cpp index 65ca1aaa52..6a5efea2d6 100644 --- a/cpp/src/bounce_buffer.cpp +++ b/cpp/src/bounce_buffer.cpp @@ -74,7 +74,8 @@ AllocRetain::Alloc AllocRetain::get() // If no available allocation, allocate and register a new one void* alloc{}; // Allocate page-locked host memory - CUDA_DRIVER_TRY(cudaAPI::instance().MemHostAlloc(&alloc, _size, CU_MEMHOSTREGISTER_PORTABLE)); + // Under unified addressing, host memory allocated this way is automatically portable and mapped. + CUDA_DRIVER_TRY(cudaAPI::instance().MemHostAlloc(&alloc, _size, CU_MEMHOSTALLOC_PORTABLE)); return Alloc(this, alloc, _size); } diff --git a/cpp/src/defaults.cpp b/cpp/src/defaults.cpp index d71f51fa68..fccc89e64d 100644 --- a/cpp/src/defaults.cpp +++ b/cpp/src/defaults.cpp @@ -140,7 +140,7 @@ bool defaults::is_compat_mode_preferred(CompatMode compat_mode) noexcept bool defaults::is_compat_mode_preferred() { return is_compat_mode_preferred(compat_mode()); } -BS::thread_pool& defaults::thread_pool() { return instance()->_thread_pool; } +BS_thread_pool& defaults::thread_pool() { return instance()->_thread_pool; } unsigned int defaults::thread_pool_nthreads() { return thread_pool().get_thread_count(); } diff --git a/cpp/src/nvtx.cpp b/cpp/src/nvtx.cpp index 8611533a2f..b9d7c3e146 100644 --- a/cpp/src/nvtx.cpp +++ b/cpp/src/nvtx.cpp @@ -26,13 +26,13 @@ namespace kvikio { -nvtx_manager& nvtx_manager::instance() noexcept +NvtxManager& NvtxManager::instance() noexcept { - static nvtx_manager _instance; + static NvtxManager _instance; return _instance; } -const nvtx_color_type& nvtx_manager::default_color() noexcept +const nvtx_color_type& NvtxManager::default_color() noexcept { #ifdef KVIKIO_CUDA_FOUND static nvtx_color_type default_color{nvtx3::argb{0, 255, 255, 255}}; @@ -43,7 +43,7 @@ const nvtx_color_type& nvtx_manager::default_color() noexcept #endif } -const nvtx_color_type& nvtx_manager::get_color_by_index(std::uint64_t idx) noexcept +const nvtx_color_type& NvtxManager::get_color_by_index(std::uint64_t idx) noexcept { #ifdef KVIKIO_CUDA_FOUND constexpr std::size_t num_color{16}; @@ -72,7 +72,7 @@ const nvtx_color_type& nvtx_manager::get_color_by_index(std::uint64_t idx) noexc #endif } -void nvtx_manager::rename_current_thread(std::string_view new_name) noexcept +void NvtxManager::rename_current_thread(std::string_view new_name) noexcept { #ifdef KVIKIO_CUDA_FOUND auto tid = syscall(SYS_gettid); diff --git a/cpp/tests/test_basic_io.cpp b/cpp/tests/test_basic_io.cpp index e16bd99d83..a8f1aaf549 100644 --- a/cpp/tests/test_basic_io.cpp +++ b/cpp/tests/test_basic_io.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ +#include #include -#include "kvikio/defaults.hpp" #include "utils.hpp" using namespace kvikio::test;