Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Feb 4, 2025
2 parents f43b661 + de8f384 commit 3036c05
Show file tree
Hide file tree
Showing 52 changed files with 1,707 additions and 24 deletions.
36 changes: 36 additions & 0 deletions mteb/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1400,3 +1400,39 @@ def load_results(
year={2024}
}""",
)

BEIR_NL = Benchmark(
name="BEIR-NL",
tasks=get_tasks(
tasks=[
"ArguAna-NL",
"CQADupstack-NL",
"FEVER-NL",
"NQ-NL",
"Touche2020-NL",
"FiQA2018-NL",
"Quora-NL",
"HotpotQA-NL",
"SCIDOCS-NL",
"ClimateFEVER-NL",
"mMARCO-NL",
"SciFact-NL",
"DBPedia-NL",
"NFCorpus-NL",
"TRECCOVID-NL",
],
),
description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated "
"translation.",
reference="https://arxiv.org/abs/2412.08329",
contacts=["nikolay-banar"],
citation="""@misc{banar2024beirnlzeroshotinformationretrieval,
title={BEIR-NL: Zero-shot Information Retrieval Benchmark for the Dutch Language},
author={Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
year={2024},
eprint={2412.08329},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2412.08329},
}""",
)
15 changes: 15 additions & 0 deletions mteb/models/arctic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,15 @@
# splits not specified to assuming everything
# in MTEB
"NQ": ["test"],
"NQ-NL": ["test"], # translated from NQ (not trained on)
"NQHardNegatives": ["test"],
"NQ-PL": ["test"],
"HotPotQA": ["test"], # translated, not trained on
"HotPotQAHardNegatives": ["test"],
"HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on)
"HotpotQA-NL": ["test"], # translated from hotpotQA (not trained on)
"FEVER": ["test"],
"FEVER-NL": ["test"], # translated from FEVER (not trained on)
"FEVERHardNegatives": ["test"],
# not in MTEB
# trained on stack exchange (title-body)
Expand Down Expand Up @@ -160,11 +163,14 @@
# splits not specified to assuming everything
# in MTEB
"NQ": ["test"],
"NQ-NL": ["test"], # translated from NQ (not trained on)
"NQHardNegatives": ["test"],
"HotPotQA": ["test"],
"HotPotQAHardNegatives": ["test"],
"HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on)
"HotpotQA-NL": ["test"], # translated from hotpotQA (not trained on)
"FEVER": ["test"],
"FEVER-NL": ["test"], # translated from FEVER (not trained on)
"FEVERHardNegatives": ["test"],
# not in MTEB
# trained on stack exchange (title-body)
Expand Down Expand Up @@ -210,11 +216,14 @@
# splits not specified to assuming everything
# in MTEB
"NQ": ["test"],
"NQ-NL": ["test"], # translated from NQ (not trained on)
"NQHardNegatives": ["test"],
"HotPotQA": ["test"],
"HotPotQAHardNegatives": ["test"],
"HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on)
"HotpotQA-NL": ["test"], # translated from hotpotQA (not trained on)
"FEVER": ["test"],
"FEVER-NL": ["test"], # translated from FEVER (not trained on)
"FEVERHardNegatives": ["test"],
# not in MTEB
# trained on stack exchange (title-body)
Expand Down Expand Up @@ -260,11 +269,14 @@
# splits not specified to assuming everything
# in MTEB
"NQ": ["test"],
"NQ-NL": ["test"], # translated from NQ (not trained on)
"NQHardNegatives": ["test"],
"HotPotQA": ["test"],
"HotPotQAHardNegatives": ["test"],
"HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on)
"HotpotQA-NL": ["test"], # translated from hotpotQA (not trained on)
"FEVER": ["test"],
"FEVER-NL": ["test"], # translated from FEVER (not trained on)
"FEVERHardNegatives": ["test"],
# trained on stack exchange, unsure if sources match
# not in MTEB
Expand Down Expand Up @@ -310,11 +322,14 @@
# splits not specified to assuming everything
# in MTEB
"NQ": ["test"],
"NQ-NL": ["test"], # translated from NQ (not trained on)
"NQHardNegatives": ["test"],
"HotPotQA": ["test"],
"HotPotQAHardNegatives": ["test"],
"HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on)
"HotpotQA-NL": ["test"], # translated from hotpotQA (not trained on)
"FEVER": ["test"],
"FEVER-NL": ["test"], # translated from FEVER (not trained on)
"FEVERHardNegatives": ["test"],
# not in MTEB
# trained on stack exchange (title-body)
Expand Down
12 changes: 10 additions & 2 deletions mteb/models/bge_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@
"MSMARCOHardNegatives": ["train"],
"NanoMSMARCORetrieval": ["train"],
"MSMARCO-PL": ["train"], # translation not trained on
"mMARCO-NL": ["train"], # translation not trained on
"NQ": ["train"],
"NQ-NL": ["train"], # translation not trained on
"NQHardNegatives": ["train"],
"NanoNQRetrieval": ["train"],
"NQ-PL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQA-PL": ["train"], # translation not trained on
"HotpotQA-NL": ["train"], # translation not trained on
"HotpotQAHardNegatives": ["train"],
"T2Retrieval": ["train"],
"DuReader": ["train"],
Expand Down Expand Up @@ -58,6 +61,7 @@
bge_training_data = {
# source: https://data.baai.ac.cn/details/BAAI-MTP
"NQ": ["test"],
"NQ-NL": ["test"], # translation not trained on
"NQHardNegatives": ["test"],
"AmazonReviewsClassification": [
"validation",
Expand Down Expand Up @@ -309,7 +313,6 @@
"zho_Hans", # zh
]


bge_small_en_v1_5 = ModelMeta(
loader=partial( # type: ignore
sentence_transformers_loader,
Expand Down Expand Up @@ -641,7 +644,6 @@
training_datasets=bge_m3_training_data,
)


bge_multilingual_gemma2 = ModelMeta(
loader=partial( # type: ignore
sentence_transformers_loader,
Expand Down Expand Up @@ -686,11 +688,17 @@
# TriviaQA
# QuoraDuplicateQuestions
"HotpotQA": ["train"],
"HotpotQA-NL": ["train"], # translation not trained on
"FEVER": ["train"],
"FEVER-NL": ["train"], # translation not trained on
"MSMARCO": ["train"],
"mMARCO-NL": ["train"], # translation not trained on
"NQ": ["train"],
"NQ-NL": ["train"], # translation not trained on
"ArguAna": ["train"],
"ArguAna-NL": ["train"], # translation not trained on
"FiQA2018": ["train"],
"FiQA2018-NL": ["train"], # translation not trained on
# |Reranking|
"SciDocsReranking": ["train"],
"StackOverflowDupQuestions": ["train"],
Expand Down
3 changes: 2 additions & 1 deletion mteb/models/colbert_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,10 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
superseded_by=None,
training_datasets={
"MSMARCO": ["train"], # dev?
"mMARCO-NL": ["train"], # translation not trained on
},
)


jina_colbert_v2 = ModelMeta(
loader=partial(
ColBERTWrapper,
Expand Down Expand Up @@ -222,6 +222,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
superseded_by=None,
training_datasets={
"MSMARCO": ["train"],
"mMARCO-NL": ["train"], # translation not trained on
"DuRetrieval": [],
"MIRACL": ["train"],
},
Expand Down
6 changes: 4 additions & 2 deletions mteb/models/e5_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@

MISTRAL_LANGUAGES = ["eng_Latn", "fra_Latn", "deu_Latn", "ita_Latn", "spa_Latn"]


E5_INSTRUCTION = "Instruct: {instruction}\nQuery: "


E5_MISTRAL_TRAINING_DATA = {
**E5_TRAINING_DATA,
"FEVER": ["train"],
"FEVERHardNegatives": ["train"],
"FEVER-NL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQAHardNegatives": ["train"],
"HotpotQA-PL": ["train"], # translation not trained on
"HotpotQA-NL": ["train"], # translation not trained on
}

e5_instruct = ModelMeta(
Expand Down Expand Up @@ -116,8 +116,10 @@
# copied from e5
# source: https://arxiv.org/pdf/2212.03533
"NQ": ["test"],
"NQ-NL": ["test"], # translation not trained on
"NQHardNegatives": ["test"],
"MSMARCO": ["train"], # dev?
"mMARCO-NL": ["train"], # translation not trained on
# source: https://www.zeta-alpha.com/post/fine-tuning-an-llm-for-state-of-the-art-retrieval-zeta-alpha-s-top-10-submission-to-the-the-mteb-be
# "Arguana",
# "FEVER",
Expand Down
4 changes: 4 additions & 0 deletions mteb/models/e5_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,19 +120,23 @@
"MSMARCOHardNegatives": ["train"],
"NanoMSMARCORetrieval": ["train"],
"MSMARCO-PL": ["train"], # translation not trained on
"mMARCO-NL": ["train"], # translation not trained on
"NQ": ["train"],
"NQHardNegatives": ["train"],
"NanoNQRetrieval": ["train"],
"NQ-PL": ["train"], # translation not trained on
"NQ-NL": ["train"], # translation not trained on
}

ME5_TRAINING_DATA = {
**E5_TRAINING_DATA,
"FEVER": ["train"],
"FEVERHardNegatives": ["train"],
"FEVER-NL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQAHardNegatives": ["train"],
"HotpotQA-PL": ["train"], # translation not trained on
"HotpotQA-NL": ["train"], # translation not trained on
}

e5_mult_small = ModelMeta(
Expand Down
3 changes: 2 additions & 1 deletion mteb/models/gme_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

logger = logging.getLogger(__name__)


gme_qwen2_vl_2b_instruct = ModelMeta(
loader=None,
name="Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
Expand All @@ -29,6 +28,7 @@
# source: https://arxiv.org/pdf/2412.16855
"MSMARCO": ["train"],
"MSMARCO.v2": ["train"],
"mMARCO-NL": ["train"], # translation not trained on
},
public_training_code=None,
public_training_data=None,
Expand Down Expand Up @@ -56,6 +56,7 @@
# source: https://arxiv.org/pdf/2412.16855
"MSMARCO": ["train"],
"MSMARCO.v2": ["train"],
"mMARCO-NL": ["train"], # translation not trained on
},
public_training_code=None,
public_training_data=None,
Expand Down
3 changes: 2 additions & 1 deletion mteb/models/gritlm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@

logger = logging.getLogger(__name__)


GRIT_LM_TRAINING_DATA = {
**E5_TRAINING_DATA, # source https://arxiv.org/pdf/2402.09906
# also uses medi2 which contains fever and hotpotqa:
"FEVER": ["train"],
"FEVERHardNegatives": ["train"],
"FEVER-NL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQAHardNegatives": ["train"],
"HotpotQA-PL": ["train"], # translation not trained on
"HotpotQA-NL": ["train"], # translation not trained on
}


Expand Down
6 changes: 4 additions & 2 deletions mteb/models/gte_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def instruction_template(
max_tokens=131072,
)


gte_Qwen1_5_7B_instruct = ModelMeta(
loader=partial( # type: ignore
instruct_wrapper,
Expand Down Expand Up @@ -82,7 +81,6 @@ def instruction_template(
training_datasets=None,
)


gte_Qwen2_1_5B_instruct = ModelMeta(
loader=partial( # type: ignore
instruct_wrapper,
Expand Down Expand Up @@ -264,10 +262,14 @@ def instruction_template(
"DuReader": ["train"],
"MMarcoReranking": ["train"],
"CMedQAv2-reranking": ["train"],
"NQ-NL": ["train"], # translation not trained on
"NQ": ["train"],
"MSMARCO": ["train"],
"mMARCO-NL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQA-NL": ["train"],
"FEVER": ["train"],
"FEVER-NL": ["train"],
"MIRACLReranking": ["train"],
"MrTidyRetrieval": ["train"],
"MultiLongDocRetrieval": ["train"],
Expand Down
4 changes: 4 additions & 0 deletions mteb/models/ibm_granite_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,18 @@
# Machine Translations of SPECTER citation triplets
# Natural Questions (NQ)
"NQ": ["test"],
"NQ-NL": ["test"], # translation not trained on
"NQHardNegatives": ["test"],
# SQuAD2.0
# HotpotQA
"HotPotQA": ["test"],
"HotPotQAHardNegatives": ["test"],
"HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on)
"HotpotQA-NL": ["test"], # translated from hotpotQA (not trained on)
# Fever
"FEVER": ["test"],
"FEVERHardNegatives": ["test"],
"FEVER-NL": ["test"], # translated from hotpotQA (not trained on)
# PubMed
# Multilingual Miracl Triples
"MIRACLRetrieval": ["train"],
Expand All @@ -72,6 +75,7 @@
# Sadeeem Question Asnwering
# DBPedia Title-Body Pairs
"DBPedia": ["train"],
"DBPedia-NL": ["train"], # translated from hotpotQA (not trained on)
# Synthetic: English Query-Wikipedia Passage
# Synthetic: English Fact Verification
# Synthetic: Multilingual Query-Wikipedia Passage
Expand Down
3 changes: 2 additions & 1 deletion mteb/models/jina_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,16 +234,17 @@ def encode(
"MSMARCO": ["train"],
"MSMARCOHardNegatives": ["train"],
"NanoMSMARCORetrieval": ["train"],
"mMARCO-NL": ["train"], # translation not trained on
"NQ": ["train"],
"NQHardNegatives": ["train"],
"NanoNQRetrieval": ["train"],
"NQ-PL": ["train"], # translation not trained on
"NQ-NL": ["train"], # translation not trained on
# oasst1, oasst2
},
adapted_from="XLM-RoBERTa",
)


jina_embeddings_v2_base_en = ModelMeta(
loader=partial(
SentenceTransformerWrapper,
Expand Down
Loading

0 comments on commit 3036c05

Please sign in to comment.