Skip to content

Commit

Permalink
run queries twice to test for idempotence
Browse files Browse the repository at this point in the history
  • Loading branch information
jparismorgan committed Dec 11, 2023
1 parent 22577e5 commit 211182e
Showing 1 changed file with 72 additions and 54 deletions.
126 changes: 72 additions & 54 deletions apis/python/test/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
def test_flat_index(tmp_path):
uri = os.path.join(tmp_path, "array")
index = flat_index.create(uri=uri, dimensions=3, vector_type=np.dtype(np.uint8))
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {ind.MAX_UINT64} == set(result_i[0])
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {ind.MAX_UINT64} == set(result_i[0])

update_vectors = np.empty([5], dtype=object)
update_vectors[0] = np.array([0, 0, 0], dtype=np.dtype(np.uint8))
Expand All @@ -19,39 +20,47 @@ def test_flat_index(tmp_path):
update_vectors[3] = np.array([3, 3, 3], dtype=np.dtype(np.uint8))
update_vectors[4] = np.array([4, 4, 4], dtype=np.dtype(np.uint8))
index.update_batch(vectors=update_vectors, external_ids=np.array([0, 1, 2, 3, 4]))
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {1, 2, 3}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {1, 2, 3}.issubset(set(result_i[0]))

index = index.consolidate_updates()
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {1, 2, 3}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {1, 2, 3}.issubset(set(result_i[0]))

index.delete_batch(external_ids=np.array([1, 3]))
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {0, 2, 4}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {0, 2, 4}.issubset(set(result_i[0]))

index = index.consolidate_updates()
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {0, 2, 4}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {0, 2, 4}.issubset(set(result_i[0]))

update_vectors = np.empty([2], dtype=object)
update_vectors[0] = np.array([1, 1, 1], dtype=np.dtype(np.uint8))
update_vectors[1] = np.array([3, 3, 3], dtype=np.dtype(np.uint8))
index.update_batch(vectors=update_vectors, external_ids=np.array([1, 3]))
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {1, 2, 3}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {1, 2, 3}.issubset(set(result_i[0]))

index = index.consolidate_updates()
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {1, 2, 3}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {1, 2, 3}.issubset(set(result_i[0]))

index.delete_batch(external_ids=np.array([1, 3]))
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {0, 2, 4}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {0, 2, 4}.issubset(set(result_i[0]))

index = index.consolidate_updates()
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {0, 2, 4}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(np.array([[2, 2, 2]], dtype=np.float32), k=3)
assert {0, 2, 4}.issubset(set(result_i[0]))


def test_ivf_flat_index(tmp_path):
Expand All @@ -60,10 +69,11 @@ def test_ivf_flat_index(tmp_path):
index = ivf_flat_index.create(
uri=uri, dimensions=3, vector_type=np.dtype(np.uint8), partitions=partitions
)
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {ind.MAX_UINT64} == set(result_i[0])
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {ind.MAX_UINT64} == set(result_i[0])

update_vectors = np.empty([5], dtype=object)
update_vectors[0] = np.array([0, 0, 0], dtype=np.dtype(np.uint8))
Expand All @@ -72,52 +82,60 @@ def test_ivf_flat_index(tmp_path):
update_vectors[3] = np.array([3, 3, 3], dtype=np.dtype(np.uint8))
update_vectors[4] = np.array([4, 4, 4], dtype=np.dtype(np.uint8))
index.update_batch(vectors=update_vectors, external_ids=np.array([0, 1, 2, 3, 4]))
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {1, 2, 3}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {1, 2, 3}.issubset(set(result_i[0]))

index = index.consolidate_updates()
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {1, 2, 3}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {1, 2, 3}.issubset(set(result_i[0]))

index.delete_batch(external_ids=np.array([1, 3]))
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {0, 2, 4}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {0, 2, 4}.issubset(set(result_i[0]))

index = index.consolidate_updates()
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {0, 2, 4}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {0, 2, 4}.issubset(set(result_i[0]))

update_vectors = np.empty([2], dtype=object)
update_vectors[0] = np.array([1, 1, 1], dtype=np.dtype(np.uint8))
update_vectors[1] = np.array([3, 3, 3], dtype=np.dtype(np.uint8))
index.update_batch(vectors=update_vectors, external_ids=np.array([1, 3]))
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {1, 2, 3}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {1, 2, 3}.issubset(set(result_i[0]))

index = index.consolidate_updates()
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {1, 2, 3}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {1, 2, 3}.issubset(set(result_i[0]))

index.delete_batch(external_ids=np.array([1, 3]))
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {0, 2, 4}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {0, 2, 4}.issubset(set(result_i[0]))

index = index.consolidate_updates()
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {0, 2, 4}.issubset(set(result_i[0]))
for _ in range(2):
result_d, result_i = index.query(
np.array([[2, 2, 2]], dtype=np.float32), k=3, nprobe=partitions
)
assert {0, 2, 4}.issubset(set(result_i[0]))

0 comments on commit 211182e

Please sign in to comment.