3
3
from elasticsearch import Elasticsearch , helpers
4
4
import json
5
5
import time
6
- from typing import List , Optional
6
+ from typing import List
7
7
import os
8
8
import h5py
9
9
import uuid
10
10
import numpy as np
11
- import csv
11
+ import logging
12
12
13
13
from .base_client import BaseClient
14
14
@@ -74,7 +74,7 @@ def upload(self):
74
74
for i , line in enumerate (data_file ):
75
75
row = line .strip ().split ('\t ' )
76
76
if len (row ) != len (headers ):
77
- print (f"row = { i } , row_len = { len (row )} , not equal headers len, skip" )
77
+ logging . info (f"row = { i } , row_len = { len (row )} , not equal headers len, skip" )
78
78
continue
79
79
row_dict = {header : value for header , value in zip (headers , row )}
80
80
current_batch .append ({"_index" : self .collection_name , "_id" : uuid .UUID (int = i ).hex , "_source" : row_dict })
@@ -133,7 +133,7 @@ def search(self) -> list[list[Any]]:
133
133
The function returns id list.
134
134
"""
135
135
query_path = os .path .join (self .path_prefix , self .data ["query_path" ])
136
- print (query_path )
136
+ logging . info (query_path )
137
137
results = []
138
138
_ , ext = os .path .splitext (query_path )
139
139
if ext == '.json' or ext == '.jsonl' :
@@ -184,7 +184,7 @@ def search(self) -> list[list[Any]]:
184
184
latency = (end - start ) * 1000
185
185
result = [(uuid .UUID (hex = hit ['_id' ]).int , hit ['_score' ]) for hit in result ['hits' ]['hits' ]]
186
186
result .append (latency )
187
- print (f"{ line [:- 1 ]} , { latency } " )
187
+ logging . info (f"{ line [:- 1 ]} , { latency } " )
188
188
results .append (result )
189
189
else :
190
190
raise TypeError ("Unsupported file type" )
@@ -214,7 +214,7 @@ def check_and_save_results(self, results: List[List[Any]]):
214
214
precisions .append (precision )
215
215
latencies .append (result [- 1 ])
216
216
217
- print (
217
+ logging . info (
218
218
f'''mean_time: { np .mean (latencies )} , mean_precisions: { np .mean (precisions )} ,
219
219
std_time: { np .std (latencies )} , min_time: { np .min (latencies )} , \n
220
220
max_time: { np .max (latencies )} , p95_time: { np .percentile (latencies , 95 )} ,
@@ -223,7 +223,7 @@ def check_and_save_results(self, results: List[List[Any]]):
223
223
latencies = []
224
224
for result in results :
225
225
latencies .append (result [- 1 ])
226
- print (
226
+ logging . info (
227
227
f'''mean_time: { np .mean (latencies )} , std_time: { np .std (latencies )} ,
228
228
max_time: { np .max (latencies )} , min_time: { np .min (latencies )} ,
229
229
p95_time: { np .percentile (latencies , 95 )} , p99_time: { np .percentile (latencies , 99 )} ''' )
0 commit comments