Skip to content

Commit d551716

Browse files
authored
Allow user to give index name to search dense vector or ignore index to search dense vector (#1643)
### What problem does this PR solve? - Use 'ignore index' to brutal force search - Use given index name indicated by 'index_name' to search dense vector. ### Type of change - [x] New Feature (non-breaking change which adds functionality) - [x] Test cases - [x] Python SDK impacted, Need to update PyPI Signed-off-by: Jin Hai <haijin.chn@gmail.com>
1 parent 24ec1eb commit d551716

24 files changed

+2814
-2444
lines changed

python/benchmark/fulltext_import_benchmark.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def insert_data(db_obj, data):
2828
table_obj = db_obj.create_table("insert_benchmark",
2929
{"id": {"type": "varchar"}, "title": {"type": "varchar"},
3030
"text": {"type": "varchar"}}, ConflictType.Error)
31-
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText, []))
31+
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText))
3232
assert res.error_code == ErrorCode.OK
3333
inserted_records_num = 0
3434
while inserted_records_num < len(data):
@@ -50,7 +50,7 @@ def import_file(db_obj, path):
5050
{"id": {"type": "varchar"}, "title": {"type": "varchar"},
5151
"text": {"type": "varchar"}}, ConflictType.Error)
5252
assert table_obj
53-
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText, []))
53+
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText))
5454
assert res.error_code == ErrorCode.OK
5555
table_obj.import_data(path, {'file_type': 'jsonl'})
5656

python/benchmark/legacy_benchmark/fulltext_import_benchmark.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def insert_data(db_obj, data):
2828
table_obj = db_obj.create_table("insert_benchmark",
2929
{"id": {"type": "varchar"}, "title": {"type": "varchar"},
3030
"text": {"type": "varchar"}}, ConflictType.Error)
31-
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText, []))
31+
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText))
3232
assert res.error_code == ErrorCode.OK
3333
inserted_records_num = 0
3434
while inserted_records_num < len(data):
@@ -50,7 +50,7 @@ def import_file(db_obj, path):
5050
{"id": {"type": "varchar"}, "title": {"type": "varchar"},
5151
"text": {"type": "varchar"}}, ConflictType.Error)
5252
assert table_obj
53-
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText, []))
53+
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText))
5454
assert res.error_code == ErrorCode.OK
5555
table_obj.import_data(path, {'file_type': 'jsonl'})
5656

python/benchmark/legacy_benchmark/remote_benchmark_knn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def one_thread(rounds, query_path, ground_truth_path, ef: int, remote: bool, tab
203203

204204
query_builder = InfinityThriftQueryBuilder(table)
205205
query_builder.output(["_row_id"])
206-
query_builder.knn('col1', query_vec, 'float', 'l2', 100, {'ef': str(ef)})
206+
query_builder.knn('col1', query_vec, 'float', 'l2', 100, {'index_name': 'hnsw_index', 'ef': str(ef)})
207207
res, _ = query_builder.to_result()
208208
end = time.time()
209209

python/benchmark/legacy_benchmark/remote_benchmark_knn_import.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,13 @@ def create_index(table_obj, m: int, ef_construction: int, remote: bool):
106106
index.IndexInfo(
107107
"col1",
108108
index.IndexType.Hnsw,
109-
[
110-
index.InitParameter("M", str(m)),
111-
index.InitParameter("ef_construction", str(ef_construction)),
112-
index.InitParameter("ef", str(ef_construction)),
113-
index.InitParameter("metric", "l2"),
114-
index.InitParameter("encode", "lvq"),
115-
],
109+
{
110+
"m": str(m),
111+
"ef_construction": str(ef_construction),
112+
"ef": str(ef_construction),
113+
"metric": "l2",
114+
"encode": "lvq"
115+
},
116116
)
117117
)
118118

python/benchmark/legacy_benchmark/remote_benchmark_knn_insert.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,13 @@ def create_index(table_name):
125125
res = table.create_index("hnsw_index",
126126
index.IndexInfo("col1",
127127
index.IndexType.Hnsw,
128-
[
129-
index.InitParameter("M", "16"),
130-
index.InitParameter("ef_construction", "200"),
131-
index.InitParameter("ef", "200"),
132-
index.InitParameter("metric", "l2"),
133-
index.InitParameter("encode", "lvq")
134-
]))
128+
{
129+
"m": "16",
130+
"ef_construction": "200",
131+
"ef": "200",
132+
"metric": "l2",
133+
"encode": "lvq"
134+
}))
135135

136136
assert res.error_code == ErrorCode.OK
137137

python/benchmark/mldr_benchmark/insert_data.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,22 @@ def main(self):
9797
print("Finish creating fulltext index.")
9898
print("Start creating Hnsw index...")
9999
res = self.infinity_table.create_index("hnsw_index", index.IndexInfo("dense_col", index.IndexType.Hnsw,
100-
[index.InitParameter("M", "16"),
101-
index.InitParameter("ef_construction",
102-
"200"),
103-
index.InitParameter("ef", "200"),
104-
index.InitParameter("metric", "ip"),
105-
index.InitParameter("encode", "lvq")]),
100+
{
101+
"m": "16",
102+
"ef_construction": "200",
103+
"ef": "200",
104+
"metric": "ip",
105+
"encode": "lvq"
106+
}),
106107
ConflictType.Error)
107108
assert res.error_code == ErrorCode.OK
108109
print("Finish creating Hnsw index.")
109110
print("Start creating BMP index...")
110111
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,
111-
[index.InitParameter("block_size", "8"),
112-
index.InitParameter("compress_type",
113-
"compress")]),
112+
{
113+
"block_size": "8",
114+
"compress_type": "compress"
115+
}),
114116
ConflictType.Error)
115117
assert res.error_code == ErrorCode.OK
116118
self.infinity_table.optimize("bmp_index", {"topk": "1000", "bp_reorder": ""})

python/benchmark/mldr_benchmark/insert_data_50000.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -104,20 +104,22 @@ def main(self):
104104
print("Finish creating fulltext index.")
105105
print("Start creating Hnsw index...")
106106
res = self.infinity_table.create_index("hnsw_index", index.IndexInfo("dense_col", index.IndexType.Hnsw,
107-
[index.InitParameter("M", "16"),
108-
index.InitParameter("ef_construction",
109-
"1000"),
110-
index.InitParameter("ef", "1000"),
111-
index.InitParameter("metric", "ip"),
112-
index.InitParameter("encode", "lvq")]),
107+
{
108+
"m": "16",
109+
"ef_construction": "1000",
110+
"ef": "1000",
111+
"metric": "ip",
112+
"encode": "lvq"
113+
}),
113114
ConflictType.Error)
114115
assert res.error_code == ErrorCode.OK
115116
print("Finish creating Hnsw index.")
116117
print("Start creating BMP index...")
117118
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,
118-
[index.InitParameter("block_size", "8"),
119-
index.InitParameter("compress_type",
120-
"compress")]),
119+
{
120+
"block_size": "8",
121+
"compress_type": "compress",
122+
}),
121123
ConflictType.Error)
122124
assert res.error_code == ErrorCode.OK
123125
self.infinity_table.optimize("bmp_index", {"topk": "1000", "bp_reorder": ""})

python/benchmark/mldr_benchmark/insert_data_with_colbert.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -100,30 +100,32 @@ def main(self):
100100
del docid_list
101101
del corpus_text_list
102102
print("Start creating fulltext index.")
103-
ft_params = []
103+
ft_params = {}
104104
if lang == "zh":
105-
ft_params.append(index.InitParameter("analyzer", "chinese"))
105+
ft_params = {"analyzer": "chinese"}
106106
res = self.infinity_table.create_index("ft_index",
107107
index.IndexInfo("fulltext_col", index.IndexType.FullText, ft_params),
108108
ConflictType.Error)
109109
assert res.error_code == ErrorCode.OK
110110
print("Finish creating fulltext index.")
111111
print("Start creating Hnsw index...")
112112
res = self.infinity_table.create_index("hnsw_index", index.IndexInfo("dense_col", index.IndexType.Hnsw,
113-
[index.InitParameter("M", "16"),
114-
index.InitParameter("ef_construction",
115-
"200"),
116-
index.InitParameter("ef", "200"),
117-
index.InitParameter("metric", "ip"),
118-
index.InitParameter("encode", "lvq")]),
113+
{
114+
"m": "16",
115+
"ef_construction": "200",
116+
"ef": "200",
117+
"metric": "ip",
118+
"encode": "lvq"
119+
}),
119120
ConflictType.Error)
120121
assert res.error_code == ErrorCode.OK
121122
print("Finish creating Hnsw index.")
122123
print("Start creating BMP index...")
123124
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,
124-
[index.InitParameter("block_size", "8"),
125-
index.InitParameter("compress_type",
126-
"compress")]),
125+
{
126+
"block_size": "8",
127+
"compress_type": "compress"
128+
}),
127129
ConflictType.Error)
128130
assert res.error_code == ErrorCode.OK
129131
self.infinity_table.optimize("bmp_index", {"topk": "1000", "bp_reorder": ""})

python/benchmark/mldr_benchmark/insert_data_with_colbert_50000.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -113,20 +113,20 @@ def main(self):
113113
print("Finish creating fulltext index.")
114114
print("Start creating Hnsw index...")
115115
res = self.infinity_table.create_index("hnsw_index", index.IndexInfo("dense_col", index.IndexType.Hnsw,
116-
[index.InitParameter("M", "16"),
117-
index.InitParameter("ef_construction",
118-
"200"),
119-
index.InitParameter("ef", "200"),
120-
index.InitParameter("metric", "ip"),
121-
index.InitParameter("encode", "lvq")]),
116+
{
117+
"m": "16",
118+
"ef_construction": "200",
119+
"ef": "200",
120+
"metric": "ip",
121+
"encode": "lvq"
122+
}),
122123
ConflictType.Error)
123124
assert res.error_code == ErrorCode.OK
124125
print("Finish creating Hnsw index.")
125126
print("Start creating BMP index...")
126-
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,
127-
[index.InitParameter("block_size", "8"),
128-
index.InitParameter("compress_type",
129-
"compress")]),
127+
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,{
128+
"block_size": "8",
129+
"compress_type": "compress"}),
130130
ConflictType.Error)
131131
assert res.error_code == ErrorCode.OK
132132
self.infinity_table.optimize("bmp_index", {"topk": "1000", "bp_reorder": ""})

python/infinity/local_infinity/query_builder.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,9 @@ def knn(
155155
knn_opt_params = []
156156
if knn_params != None:
157157
for k, v in knn_params.items():
158-
knn_opt_params.append(InitParameter(k, v))
158+
key = k.lower()
159+
value = v.lower()
160+
knn_opt_params.append(InitParameter(key, value))
159161

160162
knn_expr = WrapKnnExpr()
161163
knn_expr.column_expr = column_expr

python/infinity/remote_thrift/query_builder.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ def knn(
172172
knn_opt_params = []
173173
if knn_params is not None:
174174
for k, v in knn_params.items():
175-
knn_opt_params.append(InitParameter(k, v))
175+
key = k.lower()
176+
value = v.lower()
177+
knn_opt_params.append(InitParameter(key, value))
176178

177179
knn_expr = KnnExpr(
178180
column_expr=column_expr,

0 commit comments

Comments
 (0)