Skip to content

Commit

Permalink
[Feat] Expose search with bitset C API for Brute Force
Browse files Browse the repository at this point in the history
  • Loading branch information
rhdong committed Feb 22, 2025
1 parent a1e0cc0 commit 0289b0a
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 30 deletions.
18 changes: 13 additions & 5 deletions cpp/src/neighbors/brute_force_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ void _search(cuvsResources_t res,
using neighbors_mdspan_type = raft::device_matrix_view<int64_t, int64_t, raft::row_major>;
using distances_mdspan_type = raft::device_matrix_view<DistT, int64_t, raft::row_major>;
using prefilter_mds_type = raft::device_vector_view<uint32_t, int64_t>;
using prefilter_bmp_type = cuvs::core::bitmap_view<uint32_t, int64_t>;

auto queries_mds = cuvs::core::from_dlpack<queries_mdspan_type>(queries_tensor);
auto neighbors_mds = cuvs::core::from_dlpack<neighbors_mdspan_type>(neighbors_tensor);
Expand All @@ -85,16 +84,25 @@ void _search(cuvsResources_t res,
distances_mds,
cuvs::neighbors::filtering::none_sample_filter{});
} else if (prefilter.type == BITMAP) {
auto prefilter_ptr = reinterpret_cast<DLManagedTensor*>(prefilter.addr);
auto prefilter_mds = cuvs::core::from_dlpack<prefilter_mds_type>(prefilter_ptr);
const auto prefilter = cuvs::neighbors::filtering::bitmap_filter(
using prefilter_bmp_type = cuvs::core::bitmap_view<uint32_t, int64_t>;
auto prefilter_ptr = reinterpret_cast<DLManagedTensor*>(prefilter.addr);
auto prefilter_mds = cuvs::core::from_dlpack<prefilter_mds_type>(prefilter_ptr);
const auto prefilter = cuvs::neighbors::filtering::bitmap_filter(
prefilter_bmp_type((uint32_t*)prefilter_mds.data_handle(),
queries_mds.extent(0),
index_ptr->dataset().extent(0)));
cuvs::neighbors::brute_force::search(
*res_ptr, params, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter);
} else if (prefilter.type == BITSET) {
using prefilter_bst_type = cuvs::core::bitset_view<uint32_t, int64_t>;
auto prefilter_ptr = reinterpret_cast<DLManagedTensor*>(prefilter.addr);
auto prefilter_mds = cuvs::core::from_dlpack<prefilter_mds_type>(prefilter_ptr);
const auto prefilter = cuvs::neighbors::filtering::bitset_filter(
prefilter_bst_type((uint32_t*)prefilter_mds.data_handle(), index_ptr->dataset().extent(0)));
cuvs::neighbors::brute_force::search(
*res_ptr, params, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter);
} else {
RAFT_FAIL("Unsupported prefilter type: BITSET");
RAFT_FAIL("Unsupported prefilter type");
}
}

Expand Down
105 changes: 85 additions & 20 deletions cpp/tests/neighbors/brute_force_c.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ extern "C" void run_brute_force(int64_t n_rows,
float* index_data,
float* query_data,
uint32_t* prefilter_data,
enum cuvsFilterType filter_type,
float* distances_data,
int64_t* neighbors_data,
cuvsDistanceType metric);
Expand Down Expand Up @@ -80,6 +81,35 @@ index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector<b
return nnz;
}

template <typename bitset_t = uint32_t>
void repeat_cpu_bitset(std::vector<bitset_t>& input,
size_t input_bits,
size_t repeat,
std::vector<bitset_t>& output)
{
const size_t output_bits = input_bits * repeat;
const size_t output_units = (output_bits + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8);

std::memset(output.data(), 0, output_units * sizeof(bitset_t));

size_t output_bit_index = 0;

for (size_t r = 0; r < repeat; ++r) {
for (size_t i = 0; i < input_bits; ++i) {
size_t input_unit_index = i / (sizeof(bitset_t) * 8);
size_t input_bit_offset = i % (sizeof(bitset_t) * 8);
bool bit = (input[input_unit_index] >> input_bit_offset) & 1;

size_t output_unit_index = output_bit_index / (sizeof(bitset_t) * 8);
size_t output_bit_offset = output_bit_index % (sizeof(bitset_t) * 8);

output[output_unit_index] |= (static_cast<bitset_t>(bit) << output_bit_offset);

++output_bit_index;
}
}
}

template <typename index_t, typename bitmap_t = uint32_t>
void cpu_convert_to_csr(std::vector<bitmap_t>& bitmap,
index_t rows,
Expand Down Expand Up @@ -205,10 +235,11 @@ void cpu_select_k(const std::vector<index_t>& indptr_h,
}
}

template <typename value_t, typename index_t, typename bitmap_t = uint32_t>
template <typename value_t, typename index_t, typename bits_t = uint32_t>
void cpu_brute_force_with_filter(value_t* query_data,
value_t* index_data,
std::vector<bitmap_t>& filter,
std::vector<bits_t>& filter,
enum cuvsFilterType filter_type,
std::vector<index_t>& out_indices_h,
std::vector<value_t>& out_values_h,
size_t n_queries,
Expand All @@ -219,11 +250,21 @@ void cpu_brute_force_with_filter(value_t* query_data,
bool select_min,
cuvsDistanceType metric)
{
std::vector<value_t> values_h(nnz);
std::vector<index_t> indices_h(nnz);
size_t actual_nnz = nnz;
size_t element = raft::ceildiv(n_queries * n_dataset, size_t(sizeof(bits_t) * 8));

std::vector<bits_t> filter_repeat_h(element);
if (filter_type == BITSET) {
actual_nnz = nnz * n_queries;
repeat_cpu_bitset(filter, n_dataset, n_queries, filter_repeat_h);
} else {
filter_repeat_h = filter;
}
std::vector<value_t> values_h(actual_nnz);
std::vector<index_t> indices_h(actual_nnz);
std::vector<index_t> indptr_h(n_queries + 1);

cpu_convert_to_csr(filter, (index_t)n_queries, (index_t)n_dataset, indices_h, indptr_h);
cpu_convert_to_csr(filter_repeat_h, (index_t)n_queries, (index_t)n_dataset, indices_h, indptr_h);

cpu_sddmm(query_data,
index_data,
Expand Down Expand Up @@ -302,10 +343,11 @@ void recall_eval(T* query_data,
min_recall));
};

template <typename T, typename IdxT, typename bitmap_t = uint32_t>
template <typename T, typename IdxT, typename bits_t = uint32_t>
void recall_eval_with_filter(T* query_data,
T* index_data,
std::vector<bitmap_t>& filter_h,
std::vector<bits_t>& filter_h,
enum cuvsFilterType filter_type,
IdxT* neighbors_d,
T* distances_d,
std::vector<T>& distances_ref_h,
Expand Down Expand Up @@ -337,6 +379,7 @@ void recall_eval_with_filter(T* query_data,
cpu_brute_force_with_filter(queries_h.data(),
indices_h.data(),
filter_h,
filter_type,
neighbors_ref_h,
distances_ref_h,
n_queries,
Expand Down Expand Up @@ -388,6 +431,7 @@ TEST(BruteForceC, BuildSearch)
index_data.data(),
query_data.data(),
filter_data,
NO_FILTER,
distances_data.data(),
neighbors_data.data(),
metric);
Expand All @@ -404,20 +448,20 @@ TEST(BruteForceC, BuildSearch)
metric);
}

TEST(BruteForceC, BuildSearchWithFilter)
void run_test_with_filter(int64_t n_samples,
int64_t n_queries,
int64_t n_dim,
uint32_t n_neighbors,
enum cuvsFilterType filter_type)
{
int64_t n_rows = 8096;
int64_t n_queries = 128;
int64_t n_dim = 32;
uint32_t n_neighbors = 8;

raft::resources handle;
auto stream = raft::resource::get_cuda_stream(handle);

float sparsity = 0.2;
int64_t n_filter = (n_queries * n_rows + 31) / 32;
float sparsity = 0.2;
int64_t n_rows_filter = (filter_type == BITMAP ? n_queries : 1);
int64_t n_filter = (n_rows_filter * n_samples + 31) / 32;
std::vector<uint32_t> filter_h(n_filter);
int64_t nnz = create_sparse_matrix(n_queries, n_rows, sparsity, filter_h);
int64_t nnz = create_sparse_matrix(n_rows_filter, n_samples, sparsity, filter_h);

cuvsDistanceType metric = L2Expanded;
bool select_min = cuvs::distance::is_min_close(metric);
Expand All @@ -427,7 +471,7 @@ TEST(BruteForceC, BuildSearchWithFilter)
select_min ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::lowest());
std::vector<int64_t> neighbors_ref_h(n_queries * n_neighbors, static_cast<int64_t>(0));

rmm::device_uvector<float> index_data(n_rows * n_dim, stream);
rmm::device_uvector<float> index_data(n_samples * n_dim, stream);
rmm::device_uvector<float> query_data(n_queries * n_dim, stream);
rmm::device_uvector<int64_t> neighbors_data(n_queries * n_neighbors, stream);
rmm::device_uvector<float> distances_data(n_queries * n_neighbors, stream);
Expand All @@ -436,33 +480,54 @@ TEST(BruteForceC, BuildSearchWithFilter)
raft::copy(neighbors_data.data(), neighbors_ref_h.data(), n_queries * n_neighbors, stream);
raft::copy(distances_data.data(), distances_ref_h.data(), n_queries * n_neighbors, stream);

generate_random_data(index_data.data(), n_rows * n_dim);
generate_random_data(index_data.data(), n_samples * n_dim);
generate_random_data(query_data.data(), n_queries * n_dim);

raft::copy(filter_data.data(), filter_h.data(), n_filter, stream);

run_brute_force(n_rows,
run_brute_force(n_samples,
n_queries,
n_dim,
n_neighbors,
index_data.data(),
query_data.data(),
filter_data.data(),
filter_type,
distances_data.data(),
neighbors_data.data(),
metric);

recall_eval_with_filter(query_data.data(),
index_data.data(),
filter_h,
filter_type,
neighbors_data.data(),
distances_data.data(),
distances_ref_h,
neighbors_ref_h,
n_queries,
n_rows,
n_samples,
n_dim,
n_neighbors,
nnz,
metric);
}
TEST(BruteForceC, BuildSearchWithBitmapFilter)
{
int64_t n_rows = 8096;
int64_t n_queries = 128;
int64_t n_dim = 32;
uint32_t n_neighbors = 8;

run_test_with_filter(n_rows, n_queries, n_dim, n_neighbors, BITMAP);
}

TEST(BruteForceC, BuildSearchWithBitsetFilter)
{
int64_t n_rows = 2000;
int64_t n_queries = 100;
int64_t n_dim = 128;
uint32_t n_neighbors = 100;

run_test_with_filter(n_rows, n_queries, n_dim, n_neighbors, BITSET);
}
14 changes: 9 additions & 5 deletions cpp/tests/neighbors/run_brute_force_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void run_brute_force(int64_t n_rows,
float* index_data,
float* query_data,
uint32_t* prefilter_data,
enum cuvsFilterType prefilter_type,
float* distances_data,
int64_t* neighbors_data,
cuvsDistanceType metric)
Expand Down Expand Up @@ -90,7 +91,7 @@ void run_brute_force(int64_t n_rows,
cuvsFilter prefilter;

DLManagedTensor prefilter_tensor;
if (prefilter_data == NULL) {
if (prefilter_data == NULL || prefilter_type == NO_FILTER) {
prefilter.type = NO_FILTER;
prefilter.addr = (uintptr_t)NULL;
} else {
Expand All @@ -100,11 +101,14 @@ void run_brute_force(int64_t n_rows,
prefilter_tensor.dl_tensor.dtype.code = kDLUInt;
prefilter_tensor.dl_tensor.dtype.bits = 32;
prefilter_tensor.dl_tensor.dtype.lanes = 1;
int64_t prefilter_shape[1] = {(n_queries * n_rows + 31) / 32};
prefilter_tensor.dl_tensor.shape = prefilter_shape;
prefilter_tensor.dl_tensor.strides = NULL;

prefilter.type = BITMAP;
int64_t prefilter_bits_num = (prefilter_type == BITMAP) ? n_queries * n_rows : n_rows;
int64_t prefilter_shape[1] = {(prefilter_bits_num + 31) / 32};

prefilter_tensor.dl_tensor.shape = prefilter_shape;
prefilter_tensor.dl_tensor.strides = NULL;

prefilter.type = prefilter_type;
prefilter.addr = (uintptr_t)&prefilter_tensor;
}

Expand Down

0 comments on commit 0289b0a

Please sign in to comment.