Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Add error reporting for Retrieval #873

Merged
merged 10 commits into from
Jun 5, 2024
41 changes: 38 additions & 3 deletions mteb/abstasks/AbsTaskRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,10 +285,14 @@ def _evaluate_subset(
"Time taken to retrieve: {:.2f} seconds".format(end_time - start_time)
)

if kwargs.get("save_predictions", False):
save_predictions = kwargs.get("save_predictions", False)
export_errors = kwargs.get("export_errors", False)
if save_predictions or export_errors:
output_folder = Path(kwargs.get("output_folder", "results"))
if not os.path.isdir(output_folder):
os.makedirs(output_folder)

if save_predictions:
top_k = kwargs.get("top_k", None)
if top_k is not None:
for qid in list(results.keys()):
Expand All @@ -301,8 +305,7 @@ def _evaluate_subset(
k: v for k, v in results[qid].items() if k in doc_ids
}
qrels_save_path = (
output_folder
/ f"{self.metadata_dict['name']}_{hf_subset}_predictions.json"
output_folder / f"{self.metadata.name}_{hf_subset}_predictions.json"
)

with open(qrels_save_path, "w") as f:
Expand Down Expand Up @@ -333,6 +336,38 @@ def _evaluate_subset(
},
}
self._add_main_score(scores)

if export_errors:
errors = {}

top_k = kwargs.get("top_k", 1)
if not save_predictions and top_k == 1:
for qid in results.keys():
doc_scores = results[qid]
sorted_docs = sorted(
doc_scores.items(), key=lambda x: x[1], reverse=True
)[:top_k]
results[qid] = {doc_id: score for doc_id, score in sorted_docs}
for qid, retrieved_docs in results.items():
expected_docs = relevant_docs[qid]
false_positives = [
doc for doc in retrieved_docs if doc not in expected_docs
]
false_negatives = [
doc for doc in expected_docs if doc not in retrieved_docs
]
if false_positives or false_negatives:
errors[qid] = {
"false_positives": false_positives,
"false_negatives": false_negatives,
}

errors_save_path = (
output_folder / f"{self.metadata.name}_{hf_subset}_errors.json"
jordiclive marked this conversation as resolved.
Show resolved Hide resolved
)
with open(errors_save_path, "w") as f:
json.dump(errors, f)

return scores

def _add_main_score(self, scores: ScoresDict) -> None:
Expand Down
Loading