Skip to content

Commit

Permalink
[Fix] Duplicated node issue on Cagra NN-descent
Browse files Browse the repository at this point in the history
- fix issue: #626
  • Loading branch information
rhdong committed Feb 13, 2025
1 parent 49298b2 commit d6f6236
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 9 deletions.
32 changes: 23 additions & 9 deletions cpp/src/neighbors/detail/nn_descent.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,13 @@ int insert_to_ordered_list(InternalID_t<Index_t>* list,
int idx_insert = width;
bool position_found = false;
for (int i = 0; i < width; i++) {
if (list[i].id() == neighb_id.id()) { return width; }
if (list[i].id() == neighb_id.id()) {
if (dist_list[i] == std::numeric_limits<DistData_t>::max()) {
idx_insert = i;
dist_list[i] = dist;
}
return idx_insert;
}
if (!position_found && dist_list[i] > dist) {
idx_insert = i;
position_found = true;
Expand Down Expand Up @@ -1047,24 +1053,32 @@ void GnndGraph<Index_t>::init_random_graph()
for (size_t seg_idx = 0; seg_idx < static_cast<size_t>(num_segments); seg_idx++) {
// random sequence (range: 0~nrow)
// segment_x stores neighbors which id % num_segments == x
std::vector<Index_t> rand_seq(nrow / num_segments);
std::vector<Index_t> rand_seq((nrow + num_segments - 1) / num_segments);
std::iota(rand_seq.begin(), rand_seq.end(), 0);
auto gen = std::default_random_engine{seg_idx};
std::shuffle(rand_seq.begin(), rand_seq.end(), gen);

#pragma omp parallel for
for (size_t i = 0; i < nrow; i++) {
size_t base_idx = i * node_degree + seg_idx * segment_size;
auto h_neighbor_list = h_graph + base_idx;
auto h_dist_list = h_dists.data_handle() + base_idx;
size_t base_idx = i * node_degree + seg_idx * segment_size;
auto h_neighbor_list = h_graph + base_idx;
auto h_dist_list = h_dists.data_handle() + base_idx;
size_t idx = base_idx;
size_t self_in_this_seg = 0;
for (size_t j = 0; j < static_cast<size_t>(segment_size); j++) {
size_t idx = base_idx + j;
Index_t id = rand_seq[idx % rand_seq.size()] * num_segments + seg_idx;
if ((size_t)id == i) {
id = rand_seq[(idx + segment_size) % rand_seq.size()] * num_segments + seg_idx;
idx++;
id = rand_seq[idx % rand_seq.size()] * num_segments + seg_idx;
self_in_this_seg = 1;
}
h_neighbor_list[j].id_with_flag() = id;
h_dist_list[j] = std::numeric_limits<DistData_t>::max();

h_neighbor_list[j].id_with_flag() =
j < (rand_seq.size() - self_in_this_seg) && size_t(id) < nrow
? id
: std::numeric_limits<Index_t>::max();
h_dist_list[j] = std::numeric_limits<DistData_t>::max();
idx++;
}
}
}
Expand Down
21 changes: 21 additions & 0 deletions cpp/tests/neighbors/ann_cagra.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,9 @@ class AnnCagraIndexMergeTest : public ::testing::TestWithParam<AnnCagraInputs> {
(ps.k * ps.dim * 8 / 5 /*(=magic number)*/ < ps.n_rows))
GTEST_SKIP();

// IVF_PQ requires the `n_rows >= n_lists`.
if (ps.n_rows < 8 && ps.build_algo == graph_build_algo::IVF_PQ) GTEST_SKIP();

size_t queries_size = ps.n_queries * ps.k;
std::vector<IdxT> indices_Cagra(queries_size);
std::vector<IdxT> indices_naive(queries_size);
Expand Down Expand Up @@ -1161,6 +1164,24 @@ inline std::vector<AnnCagraInputs> generate_inputs()
{0.995});
inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());

// Corner cases for small datasets
inputs2 = raft::util::itertools::product<AnnCagraInputs>(
{2},
{3, 5, 31, 32, 64, 101},
{1, 10},
{2}, // k
{graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT},
{search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL},
{0}, // query size
{0},
{256},
{1},
{cuvs::distance::DistanceType::L2Expanded},
{false},
{true},
{0.995});
inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());

// Varying dim and build algo.
inputs2 = raft::util::itertools::product<AnnCagraInputs>(
{100},
Expand Down

0 comments on commit d6f6236

Please sign in to comment.