Skip to content

Commit

Permalink
Final FLAT changes
Browse files Browse the repository at this point in the history
  • Loading branch information
cainamisir committed Jul 15, 2024
1 parent 4417a28 commit face742
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 54 deletions.
4 changes: 4 additions & 0 deletions apis/python/src/tiledb/vector_search/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ def query(
int(os.cpu_count() / 2),
self.update_array_timestamp,
self.config,
self.distance_metric,
)
if self.query_base_array:
internal_results_d, internal_results_i = self.query_internal(
Expand Down Expand Up @@ -695,7 +696,9 @@ def _query_additions(
nthreads=8,
timestamp=None,
config=None,
distance_metric=vspy.DistanceMetric.L2,
):
print("Distance Metric: ", distance_metric)
additions_vectors, additions_external_ids, updated_ids = Index._read_additions(
updates_array_uri, timestamp, config
)
Expand All @@ -709,6 +712,7 @@ def _query_additions(
StdVector_u64(additions_external_ids),
k,
nthreads,
distance_metric,
)
return np.transpose(np.array(d)), np.transpose(np.array(i)), updated_ids

Expand Down
270 changes: 222 additions & 48 deletions apis/python/src/tiledb/vector_search/module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,16 +165,44 @@ static void declare_qv_query_heap_infinite_ram(
std::vector<Id_Type>& ids,
size_t nprobe,
size_t k_nn,
size_t nthreads) -> py::tuple {
size_t nthreads,
DistanceMetric distance_metric = DistanceMetric::L2) -> py::tuple {
auto mat = ColMajorPartitionedMatrixWrapper<T, Id_Type, Id_Type>(
parts, ids, indices);

auto top_centroids = detail::ivf::ivf_top_centroids(
centroids, query_vectors, nprobe, nthreads);
auto r = detail::ivf::qv_query_heap_infinite_ram(
top_centroids, mat, query_vectors, nprobe, k_nn, nthreads);

return make_python_pair(std::move(r));
if (distance_metric == DistanceMetric::L2) {
auto r = detail::ivf::qv_query_heap_infinite_ram(
top_centroids,
mat,
query_vectors,
nprobe,
k_nn,
nthreads,
sum_of_squares_distance{});
return make_python_pair(std::move(r));
} else if (distance_metric == DistanceMetric::INNER_PRODUCT) {
auto r = detail::ivf::qv_query_heap_infinite_ram(
top_centroids,
mat,
query_vectors,
nprobe,
k_nn,
nthreads,
inner_product_distance{});
return make_python_pair(std::move(r));
} else if (distance_metric == DistanceMetric::COSINE) {
auto r = detail::ivf::qv_query_heap_infinite_ram(
top_centroids,
mat,
query_vectors,
nprobe,
k_nn,
nthreads,
cosine_distance{});
return make_python_pair(std::move(r));
}
},
py::keep_alive<1, 2>());
}
Expand All @@ -195,24 +223,70 @@ static void declare_qv_query_heap_finite_ram(
size_t k_nn,
size_t upper_bound,
size_t nthreads,
uint64_t timestamp)
uint64_t timestamp,
DistanceMetric distance_metric = DistanceMetric::L2)
-> py::tuple { // std::tuple<ColMajorMatrix<float>,
// ColMajorMatrix<size_t>> { //
// TODO change return type
auto r = detail::ivf::qv_query_heap_finite_ram<T, Id_Type>(
ctx,
parts_uri,
centroids,
query_vectors,
indices,
ids_uri,
nprobe,
k_nn,
upper_bound,
nthreads,
timestamp);
// auto r = detail::ivf::qv_query_heap_finite_ram<T, Id_Type>(
// ctx,
// parts_uri,
// centroids,
// query_vectors,
// indices,
// ids_uri,
// nprobe,
// k_nn,
// upper_bound,
// nthreads,
// timestamp);

return make_python_pair(std::move(r));
if (distance_metric == DistanceMetric::L2) {
auto r = detail::ivf::qv_query_heap_finite_ram<T, Id_Type>(
ctx,
parts_uri,
centroids,
query_vectors,
indices,
ids_uri,
nprobe,
k_nn,
upper_bound,
nthreads,
timestamp,
sum_of_squares_distance{});
return make_python_pair(std::move(r));
} else if (distance_metric == DistanceMetric::INNER_PRODUCT) {
auto r = detail::ivf::qv_query_heap_finite_ram<T, Id_Type>(
ctx,
parts_uri,
centroids,
query_vectors,
indices,
ids_uri,
nprobe,
k_nn,
upper_bound,
nthreads,
timestamp,
inner_product_distance{});
return make_python_pair(std::move(r));
} else if (distance_metric == DistanceMetric::COSINE) {
auto r = detail::ivf::qv_query_heap_finite_ram<T, Id_Type>(
ctx,
parts_uri,
centroids,
query_vectors,
indices,
ids_uri,
nprobe,
k_nn,
upper_bound,
nthreads,
timestamp,
cosine_distance{});
return make_python_pair(std::move(r));
}
},
py::keep_alive<1, 2>());
}
Expand All @@ -229,7 +303,8 @@ static void declare_nuv_query_heap_infinite_ram(
std::vector<Id_Type>& ids,
size_t nprobe,
size_t k_nn,
size_t nthreads)
size_t nthreads,
DistanceMetric distance_metric = DistanceMetric::L2)
-> std::tuple<
ColMajorMatrix<float>,
ColMajorMatrix<uint64_t>> { // TODO change return type
Expand All @@ -240,14 +315,37 @@ static void declare_nuv_query_heap_infinite_ram(
detail::ivf::partition_ivf_flat_index<Id_Type>(
centroids, query_vectors, nprobe, nthreads);

auto r = detail::ivf::nuv_query_heap_infinite_ram(
mat,
active_partitions,
query_vectors,
active_queries,
k_nn,
nthreads);
return r;
if (distance_metric == DistanceMetric::L2) {
auto r = detail::ivf::nuv_query_heap_infinite_ram(
mat,
active_partitions,
query_vectors,
active_queries,
k_nn,
nthreads,
sum_of_squares_distance{});
return r;
} else if (distance_metric == DistanceMetric::INNER_PRODUCT) {
auto r = detail::ivf::nuv_query_heap_infinite_ram(
mat,
active_partitions,
query_vectors,
active_queries,
k_nn,
nthreads,
inner_product_distance{});
return r;
} else if (distance_metric == DistanceMetric::COSINE) {
auto r = detail::ivf::nuv_query_heap_infinite_ram(
mat,
active_partitions,
query_vectors,
active_queries,
k_nn,
nthreads,
cosine_distance{});
return r;
}
},
py::keep_alive<1, 2>());
}
Expand All @@ -267,7 +365,8 @@ static void declare_nuv_query_heap_finite_ram(
size_t k_nn,
size_t upper_bound,
size_t nthreads,
uint64_t timestamp)
uint64_t timestamp,
DistanceMetric distance_metric = DistanceMetric::L2)
-> std::tuple<
ColMajorMatrix<float>,
ColMajorMatrix<uint64_t>> { // TODO change return type
Expand All @@ -288,10 +387,37 @@ static void declare_nuv_query_heap_finite_ram(
upper_bound,
temporal_policy);

auto r = detail::ivf::nuv_query_heap_finite_ram_reg_blocked(
mat, query_vectors, active_queries, k_nn, upper_bound, nthreads);

return r;
if (distance_metric == DistanceMetric::L2) {
auto r = detail::ivf::nuv_query_heap_finite_ram_reg_blocked(
mat,
query_vectors,
active_queries,
k_nn,
upper_bound,
nthreads,
sum_of_squares_distance{});
return r;
} else if (distance_metric == DistanceMetric::INNER_PRODUCT) {
auto r = detail::ivf::nuv_query_heap_finite_ram_reg_blocked(
mat,
query_vectors,
active_queries,
k_nn,
upper_bound,
nthreads,
inner_product_distance{});
return r;
} else if (distance_metric == DistanceMetric::COSINE) {
auto r = detail::ivf::nuv_query_heap_finite_ram_reg_blocked(
mat,
query_vectors,
active_queries,
k_nn,
upper_bound,
nthreads,
cosine_distance{});
return r;
}
},
py::keep_alive<1, 2>());
}
Expand Down Expand Up @@ -576,6 +702,7 @@ static void declare_vq_query_heap(py::module& m, const std::string& suffix) {
size_t nthreads,
DistanceMetric distance_metric = DistanceMetric::L2)
-> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
fprintf(stderr, "distance metric: %d\n", distance_metric);
if (distance_metric == DistanceMetric::L2) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, ids, k, nthreads, sum_of_squares_distance{});
Expand All @@ -585,11 +712,10 @@ static void declare_vq_query_heap(py::module& m, const std::string& suffix) {
data, query_vectors, ids, k, nthreads, inner_product_distance{});
return r;
} else if (distance_metric == DistanceMetric::COSINE) {
printf("cosine\n");
auto r = detail::flat::vq_query_heap(
data, query_vectors, ids, k, nthreads, cosine_distance{});
return r;
} else {
throw std::runtime_error("Invalid distance metric");
}
});
}
Expand All @@ -603,11 +729,23 @@ static void declare_vq_query_heap_pyarray(
ColMajorMatrix<float>& query_vectors,
const std::vector<uint64_t>& ids,
int k,
size_t nthreads)
size_t nthreads,
DistanceMetric distance_metric = DistanceMetric::L2)
-> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
auto r =
detail::flat::vq_query_heap(data, query_vectors, ids, k, nthreads);
return r;
fprintf(stderr, "distance metric: %d\n", distance_metric);
if (distance_metric == DistanceMetric::L2) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, ids, k, nthreads, sum_of_squares_distance{});
return r;
} else if (distance_metric == DistanceMetric::INNER_PRODUCT) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, ids, k, nthreads, inner_product_distance{});
return r;
} else if (distance_metric == DistanceMetric::COSINE) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, ids, k, nthreads, cosine_distance{});
return r;
}
});
}

Expand Down Expand Up @@ -727,32 +865,68 @@ PYBIND11_MODULE(_tiledbvspy, m) {
[](ColMajorMatrix<float>& data,
ColMajorMatrix<float>& query_vectors,
int k,
size_t nthreads)
size_t nthreads,
DistanceMetric distance_metric = DistanceMetric::L2)
-> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
return r;
if (distance_metric == DistanceMetric::L2) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, sum_of_squares_distance{});
return r;
} else if (distance_metric == DistanceMetric::INNER_PRODUCT) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, inner_product_distance{});
return r;
} else if (distance_metric == DistanceMetric::COSINE) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, cosine_distance{});
return r;
}
});

m.def(
"query_vq_u8",
[](tdbColMajorMatrix<uint8_t>& data,
ColMajorMatrix<float>& query_vectors,
int k,
size_t nthreads)
size_t nthreads,
DistanceMetric distance_metric = DistanceMetric::L2)
-> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
return r;
if (distance_metric == DistanceMetric::L2) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, sum_of_squares_distance{});
return r;
} else if (distance_metric == DistanceMetric::INNER_PRODUCT) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, inner_product_distance{});
return r;
} else if (distance_metric == DistanceMetric::COSINE) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, cosine_distance{});
return r;
}
});

m.def(
"query_vq_i8",
[](tdbColMajorMatrix<int8_t>& data,
ColMajorMatrix<float>& query_vectors,
int k,
size_t nthreads)
size_t nthreads,
DistanceMetric distance_metric = DistanceMetric::L2)
-> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
return r;
if (distance_metric == DistanceMetric::L2) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, sum_of_squares_distance{});
return r;
} else if (distance_metric == DistanceMetric::INNER_PRODUCT) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, inner_product_distance{});
return r;
} else if (distance_metric == DistanceMetric::COSINE) {
auto r = detail::flat::vq_query_heap(
data, query_vectors, k, nthreads, cosine_distance{});
return r;
}
});

m.def(
Expand Down
Loading

0 comments on commit face742

Please sign in to comment.