Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IVF_PQ re-ranking #502

Merged
merged 32 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d4abded
initial re-ranking work
jparismorgan Aug 22, 2024
4965e4e
working
jparismorgan Aug 22, 2024
c70889c
fixed_min_heap unit tests
jparismorgan Aug 23, 2024
8a9965f
fix python bindings
jparismorgan Aug 23, 2024
9b4ecb9
single fixed_min_heap implementation for both two and three element t…
jparismorgan Aug 23, 2024
b2c864b
Go back to old heap
jparismorgan Aug 26, 2024
7c9436a
Update unit test for old heap as well
jparismorgan Aug 26, 2024
ba73beb
back to separate triplet and double heaps
jparismorgan Aug 26, 2024
5e0a68c
working without feature vectors URI
jparismorgan Aug 26, 2024
435aa19
infinite case working
jparismorgan Aug 26, 2024
1c32724
Merge branch 'main' of https://github.com/TileDB-Inc/TileDB-Vector-Se…
jparismorgan Aug 26, 2024
5a507c3
farther but need finite to load feature_vectors with tdb_partition in…
jparismorgan Aug 27, 2024
f39a6d3
infinite fixes
jparismorgan Aug 28, 2024
533854e
finite working with upper_bound=0
jparismorgan Aug 28, 2024
0dec5ce
finite working with any upper_bound
jparismorgan Aug 28, 2024
89e99a4
more tests, cleanup code
jparismorgan Aug 28, 2024
a78cac2
fix build and cleanup code
jparismorgan Aug 28, 2024
47f8685
Merge branch 'main' of https://github.com/TileDB-Inc/TileDB-Vector-Se…
jparismorgan Aug 28, 2024
3ce1f13
cleanup code
jparismorgan Aug 28, 2024
4360853
cleanup more code
jparismorgan Aug 28, 2024
d845202
fix SUM_OF_SQUARES, fix consolidation, fix test error threshold
jparismorgan Sep 2, 2024
9879423
change template back
jparismorgan Sep 3, 2024
7174f20
Merge branch 'main' of https://github.com/TileDB-Inc/TileDB-Vector-Se…
jparismorgan Sep 3, 2024
b31a00a
Merge branch 'main' of https://github.com/TileDB-Inc/TileDB-Vector-Se…
jparismorgan Sep 3, 2024
7258753
update local-benchmarks
jparismorgan Sep 4, 2024
5255f4a
update local-benchmarks
jparismorgan Sep 4, 2024
57bfac0
Merge branch 'main' of https://github.com/TileDB-Inc/TileDB-Vector-Se…
jparismorgan Sep 4, 2024
53d31e1
enable tests
jparismorgan Sep 4, 2024
91e816f
fix build
jparismorgan Sep 4, 2024
8390ef6
cleanup code
jparismorgan Sep 5, 2024
29751b1
speed up local_index_to_global(), cache its result in apply_query()
jparismorgan Sep 5, 2024
00eb882
pr feedback
jparismorgan Sep 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions apis/python/src/tiledb/vector_search/ivf_pq_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def query_internal(
self,
queries: np.ndarray,
k: int = 10,
k_factor: float = 1.0,
nprobe: Optional[int] = 100,
**kwargs,
):
Expand All @@ -101,6 +102,13 @@ def query_internal(
2D array of query vectors. This can be used as a batch query interface by passing multiple queries in one call.
k: int
Number of results to return per query vector.
k_factor: int
To improve accuracy, IVF_PQ can search for more vectors than requested and then
perform re-ranking using the original non-PQ-encoded vectors. This can be slightly
slower, but is more accurate. k_factor is the factor by which to increase the number
of vectors searched. 1 means we search for exactly `k` vectors. 10 means we search for
`10*k` vectors.
Defaults to 1.
nprobe: int
Number of partitions to check per query.
Use this parameter to trade-off accuracy for latency and cost.
Expand All @@ -120,11 +128,11 @@ def query_internal(

if self.memory_budget == -1:
distances, ids = self.index.query_infinite_ram(
queries_feature_vector_array, k, nprobe
queries_feature_vector_array, k, nprobe, k_factor
)
else:
distances, ids = self.index.query_finite_ram(
queries_feature_vector_array, k, nprobe, self.memory_budget
queries_feature_vector_array, k, nprobe, self.memory_budget, k_factor
)

return np.array(distances, copy=False), np.array(ids, copy=False)
Expand Down
23 changes: 16 additions & 7 deletions apis/python/src/tiledb/vector_search/type_erased_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -498,29 +498,38 @@ void init_type_erased_module(py::module_& m) {
[](IndexIVFPQ& index,
const FeatureVectorArray& vectors,
size_t top_k,
size_t nprobe) {
auto r =
index.query(QueryType::InfiniteRAM, vectors, top_k, nprobe);
size_t nprobe,
float k_factor) {
auto r = index.query(
QueryType::InfiniteRAM, vectors, top_k, nprobe, 0, k_factor);
return make_python_pair(std::move(r));
},
py::arg("vectors"),
py::arg("top_k"),
py::arg("nprobe"))
py::arg("nprobe"),
py::arg("k_factor") = 1.f)
.def(
"query_finite_ram",
[](IndexIVFPQ& index,
const FeatureVectorArray& vectors,
size_t top_k,
size_t nprobe,
size_t memory_budget) {
size_t memory_budget,
float k_factor) {
auto r = index.query(
QueryType::FiniteRAM, vectors, top_k, nprobe, memory_budget);
QueryType::FiniteRAM,
vectors,
top_k,
nprobe,
memory_budget,
k_factor);
return make_python_pair(std::move(r));
},
py::arg("vectors"),
py::arg("top_k"),
py::arg("nprobe"),
py::arg("memory_budget"))
py::arg("memory_budget"),
py::arg("k_factor") = 1.f)
.def(
"write_index",
[](IndexIVFPQ& index,
Expand Down
27 changes: 27 additions & 0 deletions apis/python/test/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,9 @@ def test_ivf_pq_index(tmp_path):
assert distances[0][0] == MAX_FLOAT32
assert ids[0][0] == MAX_UINT64
query_and_check_distances(index, queries, 1, [[MAX_FLOAT32]], [[MAX_UINT64]])
query_and_check_distances(
index, queries, 1, [[MAX_FLOAT32]], [[MAX_UINT64]], k_factor=2.0
)
check_default_metadata(uri, vector_type, STORAGE_VERSION, "IVF_PQ")

update_vectors = np.empty([5], dtype=object)
Expand All @@ -344,6 +347,14 @@ def test_ivf_pq_index(tmp_path):
query_and_check_distances(
index, np.array([[2, 2, 2]], dtype=np.float32), 2, [[0, 3]], [[2, 1]]
)
query_and_check_distances(
index,
np.array([[2, 2, 2]], dtype=np.float32),
2,
[[0, 3]],
[[2, 1]],
k_factor=2.0,
NikolaosPapailiou marked this conversation as resolved.
Show resolved Hide resolved
)

index = index.consolidate_updates()

Expand All @@ -366,6 +377,14 @@ def test_ivf_pq_index(tmp_path):
[[0], [0]],
[[i], [i]],
)
query_and_check_distances(
index,
np.array([[i, i, i], [i, i, i]], dtype=np.float32),
1,
[[0], [0]],
[[i], [i]],
k_factor=2.0,
)

# Test that we can query with k > 1.
query_and_check_distances(
Expand All @@ -380,6 +399,14 @@ def test_ivf_pq_index(tmp_path):
[[0, 3], [0, 3]],
[[0, 1], [4, 3]],
)
query_and_check_distances(
index,
np.array([[0, 0, 0], [4, 4, 4]], dtype=np.float32),
2,
[[0, 3], [0, 3]],
[[0, 1], [4, 3]],
k_factor=2.0,
)

vfs = tiledb.VFS()
assert vfs.dir_size(uri) > 0
Expand Down
12 changes: 10 additions & 2 deletions apis/python/test/test_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,17 @@ def test_ivf_pq_ingestion_u8(tmp_path):
queries,
k=k,
nprobe=nprobe,
use_nuv_implementation=True,
)
assert accuracy(result, gt_i) > MINIMUM_ACCURACY
query_accuracy = accuracy(result, gt_i)
assert query_accuracy > MINIMUM_ACCURACY

_, result = index_ram.query(
queries,
k=k,
k_factor=2,
nprobe=nprobe,
)
assert accuracy(result, gt_i) > query_accuracy + 0.1

_, result = index_ram.query(
queries,
Expand Down
8 changes: 4 additions & 4 deletions src/include/api/ivf_flat_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ class IndexIVFFlat {
(float*)vectors.data(),
extents(vectors)[0],
extents(vectors)[1]}; // @todo ??
auto [s, t] = impl_index_.query_infinite_ram(qspan, k_nn, nprobe);
auto [s, t, _] = impl_index_.query_infinite_ram(qspan, k_nn, nprobe);
auto x = FeatureVectorArray{std::move(s)};
auto y = FeatureVectorArray{std::move(t)};
return {std::move(x), std::move(y)};
Expand All @@ -624,7 +624,7 @@ class IndexIVFFlat {
(uint8_t*)vectors.data(),
extents(vectors)[0],
extents(vectors)[1]}; // @todo ??
auto [s, t] = impl_index_.query_infinite_ram(qspan, k_nn, nprobe);
auto [s, t, _] = impl_index_.query_infinite_ram(qspan, k_nn, nprobe);
auto x = FeatureVectorArray{std::move(s)};
auto y = FeatureVectorArray{std::move(t)};
return {std::move(x), std::move(y)};
Expand All @@ -651,7 +651,7 @@ class IndexIVFFlat {
(float*)vectors.data(),
extents(vectors)[0],
extents(vectors)[1]}; // @todo ??
auto [s, t] =
auto [s, t, _] =
impl_index_.query_finite_ram(qspan, k_nn, nprobe, upper_bound);
auto x = FeatureVectorArray{std::move(s)};
auto y = FeatureVectorArray{std::move(t)};
Expand All @@ -662,7 +662,7 @@ class IndexIVFFlat {
(uint8_t*)vectors.data(),
extents(vectors)[0],
extents(vectors)[1]}; // @todo ??
auto [s, t] =
auto [s, t, _] =
impl_index_.query_finite_ram(qspan, k_nn, nprobe, upper_bound);
auto x = FeatureVectorArray{std::move(s)};
auto y = FeatureVectorArray{std::move(t)};
Expand Down
20 changes: 12 additions & 8 deletions src/include/api/ivf_pq_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,13 @@ class IndexIVFPQ {
const QueryVectorArray& vectors,
size_t top_k,
size_t nprobe,
size_t upper_bound = 0) {
size_t upper_bound = 0,
float k_factor = 1.f) {
if (!index_) {
throw std::runtime_error("Cannot query() because there is no index.");
}
return index_->query(queryType, vectors, top_k, nprobe, upper_bound);
return index_->query(
queryType, vectors, top_k, nprobe, upper_bound, k_factor);
}

void write_index(
Expand Down Expand Up @@ -398,7 +400,8 @@ class IndexIVFPQ {
const QueryVectorArray& vectors,
size_t top_k,
size_t nprobe,
size_t upper_bound) = 0;
size_t upper_bound,
float k_factor) = 0;

virtual void write_index(
const tiledb::Context& ctx,
Expand Down Expand Up @@ -508,7 +511,8 @@ class IndexIVFPQ {
const QueryVectorArray& vectors,
size_t top_k,
size_t nprobe,
size_t upper_bound) override {
size_t upper_bound,
float k_factor) override {
// @todo using index_type = size_t;
auto dtype = vectors.feature_type();

Expand All @@ -519,8 +523,8 @@ class IndexIVFPQ {
(float*)vectors.data(),
extents(vectors)[0],
extents(vectors)[1]}; // @todo ??
auto [s, t] =
impl_index_.query(queryType, qspan, top_k, nprobe, upper_bound);
auto [s, t] = impl_index_.query(
queryType, qspan, top_k, nprobe, upper_bound, k_factor);
auto x = FeatureVectorArray{std::move(s)};
auto y = FeatureVectorArray{std::move(t)};
return {std::move(x), std::move(y)};
Expand All @@ -530,8 +534,8 @@ class IndexIVFPQ {
(uint8_t*)vectors.data(),
extents(vectors)[0],
extents(vectors)[1]}; // @todo ??
auto [s, t] =
impl_index_.query(queryType, qspan, top_k, nprobe, upper_bound);
auto [s, t] = impl_index_.query(
queryType, qspan, top_k, nprobe, upper_bound, k_factor);
auto x = FeatureVectorArray{std::move(s)};
auto y = FeatureVectorArray{std::move(t)};
return {std::move(x), std::move(y)};
Expand Down
12 changes: 9 additions & 3 deletions src/include/detail/ivf/dist_qv.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,15 @@ auto dist_qv_finite_ram_part(
fixed_min_pair_heap<score_type, shuffled_ids_type>(k_nn));

size_t part_offset = 0;
size_t indices_offset = 0;
while (partitioned_vectors.load()) {
_i.start();
auto current_part_size = ::num_partitions(partitioned_vectors);
size_t parts_per_thread = (current_part_size + nthreads - 1) / nthreads;

std::vector<std::future<decltype(min_scores)>> futs;
std::vector<std::future<std::vector<
fixed_min_triplet_heap<score_type, shuffled_ids_type, size_t>>>>
futs;
futs.reserve(nthreads);

for (size_t n = 0; n < nthreads; ++n) {
Expand All @@ -148,7 +151,8 @@ auto dist_qv_finite_ram_part(
k_nn,
first_part,
last_part,
part_offset]() {
part_offset,
indices_offset]() {
return apply_query(
partitioned_vectors,
std::optional<std::vector<int>>{},
Expand All @@ -159,6 +163,7 @@ auto dist_qv_finite_ram_part(
first_part,
last_part,
part_offset,
indices_offset,
distance);
}));
}
Expand All @@ -167,13 +172,14 @@ auto dist_qv_finite_ram_part(
auto min_n = futs[n].get();

for (size_t j = 0; j < num_queries; ++j) {
for (auto&& [e, f] : min_n[j]) {
for (auto&& [e, f, _] : min_n[j]) {
min_scores[j].insert(e, f);
}
}
}

part_offset += current_part_size;
indices_offset += num_vectors(partitioned_vectors);
_i.stop();
}
return min_scores;
Expand Down
Loading
Loading