diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index d6870ae1c..7191f8a05 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -16,11 +16,12 @@ #pragma once #include "../common/ann_types.hpp" -#include "../common/thread_pool.hpp" #include "../common/util.hpp" #include +#include + #include #include #include @@ -66,13 +67,13 @@ class hnsw_lib : public algo { struct build_param { int m; int ef_construction; - int num_threads = omp_get_num_procs(); + int num_threads = omp_get_max_threads(); }; using search_param_base = typename algo::search_param; struct search_param : public search_param_base { int ef; - int num_threads = 1; + int num_threads = omp_get_max_threads(); }; hnsw_lib(Metric metric, int dim, const build_param& param); @@ -114,7 +115,6 @@ class hnsw_lib : public algo { int ef_construction_; int m_; int num_threads_; - std::shared_ptr thread_pool_; Mode bench_mode_; }; @@ -150,22 +150,10 @@ void hnsw_lib::build(const T* dataset, size_t nrow) appr_alg_ = std::make_shared::type>>( space_.get(), nrow, m_, ef_construction_); - thread_pool_ = std::make_shared(num_threads_); - const size_t items_per_thread = nrow / (num_threads_ + 1); - - thread_pool_->submit( - [&](size_t i) { - if (i < items_per_thread && i % 10000 == 0) { - char buf[20]; - std::time_t now = std::time(nullptr); - std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now)); - printf("%s building %zu / %zu\n", buf, i, items_per_thread); - fflush(stdout); - } - - appr_alg_->addPoint(dataset + i * dim_, i); - }, - nrow); +#pragma omp parallel for num_threads(num_threads_) + for (size_t i = 0; i < nrow; i++) { + appr_alg_->addPoint(dataset + i * dim_, i); + } } template @@ -175,12 +163,7 @@ void hnsw_lib::set_search_param(const search_param_base& param_, const void* auto param = dynamic_cast(param_); appr_alg_->ef_ = param.ef; num_threads_ = param.num_threads; - // bench_mode_ = param.metric_objective; bench_mode_ = Mode::kLatency; // TODO(achirkin): pass the benchmark mode in the algo parameters - - // Create a pool if multiple query threads have been set and the pool hasn't been created already - bool create_pool = (bench_mode_ == Mode::kLatency && num_threads_ > 1 && !thread_pool_); - if (create_pool) { thread_pool_ = std::make_shared(num_threads_); } } template @@ -192,7 +175,10 @@ void hnsw_lib::search( get_search_knn_results(query + i * dim_, k, indices + i * k, distances + i * k); }; if (bench_mode_ == Mode::kLatency && num_threads_ > 1) { - thread_pool_->submit(f, batch_size); +#pragma omp parallel for num_threads(num_threads_) + for (int i = 0; i < batch_size; i++) { + f(i); + } } else { for (int i = 0; i < batch_size; i++) { f(i); diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index 6ab8631d4..1aff6229b 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -489,26 +489,15 @@ void search(raft::resources const& res, auto const* hnswlib_index = reinterpret_cast::type> const*>( idx.get_index()); + auto num_threads = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads; - // when num_threads == 0, automatically maximize parallelism - if (params.num_threads) { -#pragma omp parallel for num_threads(params.num_threads) - for (int64_t i = 0; i < queries.extent(0); ++i) { - get_search_knn_results(hnswlib_index, - queries.data_handle() + i * queries.extent(1), - neighbors.extent(1), - neighbors.data_handle() + i * neighbors.extent(1), - distances.data_handle() + i * distances.extent(1)); - } - } else { -#pragma omp parallel for - for (int64_t i = 0; i < queries.extent(0); ++i) { - get_search_knn_results(hnswlib_index, - queries.data_handle() + i * queries.extent(1), - neighbors.extent(1), - neighbors.data_handle() + i * neighbors.extent(1), - distances.data_handle() + i * distances.extent(1)); - } +#pragma omp parallel for num_threads(num_threads) + for (int64_t i = 0; i < queries.extent(0); ++i) { + get_search_knn_results(hnswlib_index, + queries.data_handle() + i * queries.extent(1), + neighbors.extent(1), + neighbors.data_handle() + i * neighbors.extent(1), + distances.data_handle() + i * distances.extent(1)); } } diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml index 063502290..bf5cd35d3 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml @@ -8,7 +8,7 @@ groups: graph_degree: [32, 64, 96, 128] intermediate_graph_degree: [32, 64, 96, 128] graph_build_algo: ["NN_DESCENT"] - hierarchy: ["none", "cpu"] + hierarchy: ["none", "cpu", "gpu"] ef_construction: [64, 128, 256, 512] search: ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800]