-
Notifications
You must be signed in to change notification settings - Fork 87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Feat] Add Support for Index merge
in CAGRA
#618
Changes from 13 commits
ba244fb
1dd5140
890a89e
696c660
318068c
e5067c8
1eb211a
0fb7dff
7af3ad8
c69af18
89d0a47
8b95d7b
7446f0e
e0633fa
690e775
6592d20
afb6026
b49e04a
da45bdb
9d3acb0
ea7991b
ca6c59f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -248,6 +248,17 @@ struct extend_params { | |
* 0. */ | ||
uint32_t max_chunk_size = 0; | ||
}; | ||
/** | ||
* @} | ||
*/ | ||
|
||
/** | ||
* @defgroup cagra_cpp_merge_params CAGRA index merge parameters | ||
* @{ | ||
*/ | ||
struct merge_params : public index_params { | ||
merge_params(const index_params& params) : index_params(params) {} | ||
}; | ||
|
||
/** | ||
* @} | ||
|
@@ -293,7 +304,7 @@ struct index : cuvs::neighbors::index { | |
return data_rows > 0 ? data_rows : graph_view_.extent(0); | ||
} | ||
|
||
/** Dimensionality of the data. */ | ||
/** dimension of the data. */ | ||
[[nodiscard]] constexpr inline auto dim() const noexcept -> uint32_t { return dataset_->dim(); } | ||
/** Graph degree */ | ||
[[nodiscard]] constexpr inline auto graph_degree() const noexcept -> uint32_t | ||
|
@@ -1778,6 +1789,150 @@ void serialize_to_hnswlib( | |
std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>> dataset = | ||
std::nullopt); | ||
|
||
/** | ||
* @defgroup cagra_cpp_index_merge CAGRA index build functions | ||
* @{ | ||
*/ | ||
|
||
/** @brief Merge multiple CAGRA indices into a single index. | ||
* | ||
* This function merges multiple CAGRA indices into one, combining both the datasets and graph | ||
* structures. | ||
* | ||
* @note: When device memory is sufficient, the dataset attached to the returned index is allocated | ||
* in device memory by default; otherwise, host memory is used automatically. | ||
* | ||
* Usage example: | ||
* @code{.cpp} | ||
* using namespace raft::neighbors; | ||
* auto dataset0 = raft::make_host_matrix<float, int64_t>(handle, size0, dim); | ||
* auto dataset1 = raft::make_host_matrix<float, int64_t>(handle, size1, dim); | ||
* | ||
* auto index0 = cagra::build(res, index_params, dataset0); | ||
* auto index1 = cagra::build(res, index_params, dataset1); | ||
* | ||
* std::vector<cagra::index<float, uint32_t>*> indices{&index0, &index1}; | ||
* cagra::merge_params params{index_params}; | ||
* | ||
* auto merged_index = cagra::merge(res, params, indices); | ||
* @endcode | ||
* | ||
* @param[in] res RAFT resources used for the merge operation. | ||
* @param[in] params Parameters that control the merging process. | ||
* @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: | ||
* - Have attached datasets with the same dimension. | ||
* | ||
* @return A new CAGRA index containing the merged indices, graph, and dataset. | ||
*/ | ||
auto merge(raft::resources const& res, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey @chatman, I'm working on |
||
const cuvs::neighbors::cagra::merge_params& params, | ||
std::vector<cuvs::neighbors::cagra::index<float, uint32_t>*>& indices) | ||
-> cuvs::neighbors::cagra::index<float, uint32_t>; | ||
|
||
/** @brief Merge multiple CAGRA indices into a single index. | ||
* | ||
* This function merges multiple CAGRA indices into one, combining both the datasets and graph | ||
* structures. | ||
* | ||
* @note: When device memory is sufficient, the dataset attached to the returned index is allocated | ||
* in device memory by default; otherwise, host memory is used automatically. | ||
* | ||
* Usage example: | ||
* @code{.cpp} | ||
* using namespace raft::neighbors; | ||
* auto dataset0 = raft::make_host_matrix<half, int64_t>(handle, size0, dim); | ||
* auto dataset1 = raft::make_host_matrix<half, int64_t>(handle, size1, dim); | ||
* | ||
* auto index0 = cagra::build(res, index_params, dataset0); | ||
* auto index1 = cagra::build(res, index_params, dataset1); | ||
* | ||
* std::vector<cagra::index<half, uint32_t>*> indices{&index0, &index1}; | ||
* cagra::merge_params params{index_params}; | ||
* | ||
* auto merged_index = cagra::merge(res, params, indices); | ||
* @endcode | ||
* | ||
* @param[in] res RAFT resources used for the merge operation. | ||
* @param[in] params Parameters that control the merging process. | ||
* @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: | ||
* - Have attached datasets with the same dimension. | ||
* | ||
* @return A new CAGRA index containing the merged indices, graph, and dataset. | ||
*/ | ||
auto merge(raft::resources const& res, | ||
const cuvs::neighbors::cagra::merge_params& params, | ||
std::vector<cuvs::neighbors::cagra::index<half, uint32_t>*>& indices) | ||
-> cuvs::neighbors::cagra::index<half, uint32_t>; | ||
|
||
/** @brief Merge multiple CAGRA indices into a single index. | ||
* | ||
* This function merges multiple CAGRA indices into one, combining both the datasets and graph | ||
* structures. | ||
* | ||
* @note: When device memory is sufficient, the dataset attached to the returned index is allocated | ||
* in device memory by default; otherwise, host memory is used automatically. | ||
* | ||
* Usage example: | ||
* @code{.cpp} | ||
* using namespace raft::neighbors; | ||
* auto dataset0 = raft::make_host_matrix<int8_t, int64_t>(handle, size0, dim); | ||
* auto dataset1 = raft::make_host_matrix<int8_t, int64_t>(handle, size1, dim); | ||
* | ||
* auto index0 = cagra::build(res, index_params, dataset0); | ||
* auto index1 = cagra::build(res, index_params, dataset1); | ||
* | ||
* std::vector<cagra::index<int8_t, uint32_t>*> indices{&index0, &index1}; | ||
* cagra::merge_params params{index_params}; | ||
* | ||
* auto merged_index = cagra::merge(res, params, indices); | ||
* @endcode | ||
* | ||
* @param[in] res RAFT resources used for the merge operation. | ||
* @param[in] params Parameters that control the merging process. | ||
* @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: | ||
* - Have attached datasets with the same dimension. | ||
* | ||
* @return A new CAGRA index containing the merged indices, graph, and dataset. | ||
*/ | ||
auto merge(raft::resources const& res, | ||
const cuvs::neighbors::cagra::merge_params& params, | ||
std::vector<cuvs::neighbors::cagra::index<int8_t, uint32_t>*>& indices) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do agree with Artem that the vector of points is not the prettiest thing, but I don't think variadic templates are the way to fix that (and they overall make things very challenging to work with). I think we can stick with pointers for now and udpate the API later if needed. Initially, this will be needed for Lucene, which will use it through our Java API so at least this public API is localized at the moment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pointers are fine, from the perspective of the Java API. We can work best with memory addresses, since we'll be mmapp'ing the index data from files on disk. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
-> cuvs::neighbors::cagra::index<int8_t, uint32_t>; | ||
|
||
/** @brief Merge multiple CAGRA indices into a single index. | ||
* | ||
* This function merges multiple CAGRA indices into one, combining both the datasets and graph | ||
* structures. | ||
* | ||
* @note: When device memory is sufficient, the dataset attached to the returned index is allocated | ||
* in device memory by default; otherwise, host memory is used automatically. | ||
* | ||
* Usage example: | ||
* @code{.cpp} | ||
* using namespace raft::neighbors; | ||
* auto dataset0 = raft::make_host_matrix<uint8_t, int64_t>(handle, size0, dim); | ||
* auto dataset1 = raft::make_host_matrix<uint8_t, int64_t>(handle, size1, dim); | ||
* | ||
* auto index0 = cagra::build(res, index_params, dataset0); | ||
* auto index1 = cagra::build(res, index_params, dataset1); | ||
* | ||
* std::vector<cagra::index<uint8_t, uint32_t>*> indices{&index0, &index1}; | ||
* cagra::merge_params params{index_params}; | ||
* | ||
* auto merged_index = cagra::merge(res, params, indices); | ||
* @endcode | ||
* | ||
* @param[in] res RAFT resources used for the merge operation. | ||
* @param[in] params Parameters that control the merging process. | ||
* @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: | ||
* - Have attached datasets with the same dimension. | ||
* | ||
* @return A new CAGRA index containing the merged indices, graph, and dataset. | ||
*/ | ||
auto merge(raft::resources const& res, | ||
const cuvs::neighbors::cagra::merge_params& params, | ||
std::vector<cuvs::neighbors::cagra::index<uint8_t, uint32_t>*>& indices) | ||
-> cuvs::neighbors::cagra::index<uint8_t, uint32_t>; | ||
/** | ||
* @} | ||
*/ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Copyright (c) 2025, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "cagra.cuh" | ||
#include <cuvs/neighbors/cagra.hpp> | ||
|
||
namespace cuvs::neighbors::cagra { | ||
|
||
#define RAFT_INST_CAGRA_MERGE(T, IdxT) \ | ||
auto merge(raft::resources const& handle, \ | ||
const cuvs::neighbors::cagra::merge_params& params, \ | ||
std::vector<cuvs::neighbors::cagra::index<T, IdxT>*>& indices) \ | ||
->cuvs::neighbors::cagra::index<T, IdxT> \ | ||
{ \ | ||
return cuvs::neighbors::cagra::merge<T, IdxT>(handle, params, indices); \ | ||
} | ||
|
||
RAFT_INST_CAGRA_MERGE(float, uint32_t); | ||
|
||
#undef RAFT_INST_CAGRA_MERGE | ||
|
||
} // namespace cuvs::neighbors::cagra |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Copyright (c) 2025, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "cagra.cuh" | ||
#include <cuvs/neighbors/cagra.hpp> | ||
|
||
namespace cuvs::neighbors::cagra { | ||
|
||
#define RAFT_INST_CAGRA_MERGE(T, IdxT) \ | ||
auto merge(raft::resources const& handle, \ | ||
const cuvs::neighbors::cagra::merge_params& params, \ | ||
std::vector<cuvs::neighbors::cagra::index<T, IdxT>*>& indices) \ | ||
->cuvs::neighbors::cagra::index<T, IdxT> \ | ||
{ \ | ||
return cuvs::neighbors::cagra::merge<T, IdxT>(handle, params, indices); \ | ||
} | ||
|
||
RAFT_INST_CAGRA_MERGE(half, uint32_t); | ||
|
||
#undef RAFT_INST_CAGRA_MERGE | ||
|
||
} // namespace cuvs::neighbors::cagra |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Copyright (c) 2025, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "cagra.cuh" | ||
#include <cuvs/neighbors/cagra.hpp> | ||
|
||
namespace cuvs::neighbors::cagra { | ||
|
||
#define RAFT_INST_CAGRA_MERGE(T, IdxT) \ | ||
auto merge(raft::resources const& handle, \ | ||
const cuvs::neighbors::cagra::merge_params& params, \ | ||
std::vector<cuvs::neighbors::cagra::index<T, IdxT>*>& indices) \ | ||
->cuvs::neighbors::cagra::index<T, IdxT> \ | ||
{ \ | ||
return cuvs::neighbors::cagra::merge<T, IdxT>(handle, params, indices); \ | ||
} | ||
|
||
RAFT_INST_CAGRA_MERGE(int8_t, uint32_t); | ||
|
||
#undef RAFT_INST_CAGRA_MERGE | ||
|
||
} // namespace cuvs::neighbors::cagra |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Copyright (c) 2025, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "cagra.cuh" | ||
#include <cuvs/neighbors/cagra.hpp> | ||
|
||
namespace cuvs::neighbors::cagra { | ||
|
||
#define RAFT_INST_CAGRA_MERGE(T, IdxT) \ | ||
auto merge(raft::resources const& handle, \ | ||
const cuvs::neighbors::cagra::merge_params& params, \ | ||
std::vector<cuvs::neighbors::cagra::index<T, IdxT>*>& indices) \ | ||
->cuvs::neighbors::cagra::index<T, IdxT> \ | ||
{ \ | ||
return cuvs::neighbors::cagra::merge<T, IdxT>(handle, params, indices); \ | ||
} | ||
|
||
RAFT_INST_CAGRA_MERGE(uint8_t, uint32_t); | ||
|
||
#undef RAFT_INST_CAGRA_MERGE | ||
|
||
} // namespace cuvs::neighbors::cagra |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change seems a little unecessary.
Dimensionality
seems like the right word (and case)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Revert it back