From 1e4d2e7568c3ebbff4ecc85e98863ca9039c109f Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 25 Feb 2025 12:42:30 -0800 Subject: [PATCH 1/4] synchronize threading --- cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h | 14 +++++------ cpp/src/neighbors/detail/hnsw.hpp | 25 ++++++------------- .../config/algos/cuvs_cagra_hnswlib.yaml | 2 +- 3 files changed, 14 insertions(+), 27 deletions(-) diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index d6870ae1c..73baae6be 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -66,13 +66,13 @@ class hnsw_lib : public algo { struct build_param { int m; int ef_construction; - int num_threads = omp_get_num_procs(); + int num_threads = omp_get_max_threads(); }; using search_param_base = typename algo::search_param; struct search_param : public search_param_base { int ef; - int num_threads = 1; + int num_threads = omp_get_max_threads(); }; hnsw_lib(Metric metric, int dim, const build_param& param); @@ -175,12 +175,7 @@ void hnsw_lib::set_search_param(const search_param_base& param_, const void* auto param = dynamic_cast(param_); appr_alg_->ef_ = param.ef; num_threads_ = param.num_threads; - // bench_mode_ = param.metric_objective; bench_mode_ = Mode::kLatency; // TODO(achirkin): pass the benchmark mode in the algo parameters - - // Create a pool if multiple query threads have been set and the pool hasn't been created already - bool create_pool = (bench_mode_ == Mode::kLatency && num_threads_ > 1 && !thread_pool_); - if (create_pool) { thread_pool_ = std::make_shared(num_threads_); } } template @@ -192,7 +187,10 @@ void hnsw_lib::search( get_search_knn_results(query + i * dim_, k, indices + i * k, distances + i * k); }; if (bench_mode_ == Mode::kLatency && num_threads_ > 1) { - thread_pool_->submit(f, batch_size); +#pragma omp parallel for num_threads(num_threads_) + for (int i = 0; i < batch_size; i++) { + f(i); + } } else { for (int i = 0; i < batch_size; i++) { f(i); diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index 6ab8631d4..787646ee6 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -489,26 +489,15 @@ void search(raft::resources const& res, auto const* hnswlib_index = reinterpret_cast::type> const*>( idx.get_index()); + auto num_threads = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads; - // when num_threads == 0, automatically maximize parallelism - if (params.num_threads) { #pragma omp parallel for num_threads(params.num_threads) - for (int64_t i = 0; i < queries.extent(0); ++i) { - get_search_knn_results(hnswlib_index, - queries.data_handle() + i * queries.extent(1), - neighbors.extent(1), - neighbors.data_handle() + i * neighbors.extent(1), - distances.data_handle() + i * distances.extent(1)); - } - } else { -#pragma omp parallel for - for (int64_t i = 0; i < queries.extent(0); ++i) { - get_search_knn_results(hnswlib_index, - queries.data_handle() + i * queries.extent(1), - neighbors.extent(1), - neighbors.data_handle() + i * neighbors.extent(1), - distances.data_handle() + i * distances.extent(1)); - } + for (int64_t i = 0; i < queries.extent(0); ++i) { + get_search_knn_results(hnswlib_index, + queries.data_handle() + i * queries.extent(1), + neighbors.extent(1), + neighbors.data_handle() + i * neighbors.extent(1), + distances.data_handle() + i * distances.extent(1)); } } diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml index 063502290..bf5cd35d3 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml @@ -8,7 +8,7 @@ groups: graph_degree: [32, 64, 96, 128] intermediate_graph_degree: [32, 64, 96, 128] graph_build_algo: ["NN_DESCENT"] - hierarchy: ["none", "cpu"] + hierarchy: ["none", "cpu", "gpu"] ef_construction: [64, 128, 256, 512] search: ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800] From 620b7ed5ba1f03fbf05548e32457e8a463efcb1f Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 25 Feb 2025 14:27:51 -0800 Subject: [PATCH 2/4] unused var --- cpp/src/neighbors/detail/hnsw.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index 787646ee6..1aff6229b 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -491,7 +491,7 @@ void search(raft::resources const& res, idx.get_index()); auto num_threads = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads; -#pragma omp parallel for num_threads(params.num_threads) +#pragma omp parallel for num_threads(num_threads) for (int64_t i = 0; i < queries.extent(0); ++i) { get_search_knn_results(hnswlib_index, queries.data_handle() + i * queries.extent(1), From 431ece06138f39c301f9c2c92826aaafd352996c Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 26 Feb 2025 15:46:26 -0800 Subject: [PATCH 3/4] remove usage of thread pool in hnsw wrapper --- cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h | 22 ++++----------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 73baae6be..887bd85c9 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -16,7 +16,6 @@ #pragma once #include "../common/ann_types.hpp" -#include "../common/thread_pool.hpp" #include "../common/util.hpp" #include @@ -114,7 +113,6 @@ class hnsw_lib : public algo { int ef_construction_; int m_; int num_threads_; - std::shared_ptr thread_pool_; Mode bench_mode_; }; @@ -150,22 +148,10 @@ void hnsw_lib::build(const T* dataset, size_t nrow) appr_alg_ = std::make_shared::type>>( space_.get(), nrow, m_, ef_construction_); - thread_pool_ = std::make_shared(num_threads_); - const size_t items_per_thread = nrow / (num_threads_ + 1); - - thread_pool_->submit( - [&](size_t i) { - if (i < items_per_thread && i % 10000 == 0) { - char buf[20]; - std::time_t now = std::time(nullptr); - std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now)); - printf("%s building %zu / %zu\n", buf, i, items_per_thread); - fflush(stdout); - } - - appr_alg_->addPoint(dataset + i * dim_, i); - }, - nrow); +#pragma omp parallel for num_threads(num_threads_) + for (size_t i = 0; i < nrow; i++) { + appr_alg_->addPoint(dataset + i * dim_, i); + } } template From b62983fa057f217a5a67e25a10ff8e02fe257305 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 27 Feb 2025 09:35:56 -0800 Subject: [PATCH 4/4] add missing header --- cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 887bd85c9..7191f8a05 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -20,6 +20,8 @@ #include +#include + #include #include #include