diff --git a/mteb/tasks/Clustering/deu/TenKGnadClusteringP2P.py b/mteb/tasks/Clustering/deu/TenKGnadClusteringP2P.py index c63e72d3af..36cb14e742 100644 --- a/mteb/tasks/Clustering/deu/TenKGnadClusteringP2P.py +++ b/mteb/tasks/Clustering/deu/TenKGnadClusteringP2P.py @@ -22,14 +22,14 @@ class TenKGnadClusteringP2P(AbsTaskClustering): eval_langs=["deu-Latn"], main_score="v_measure", date=None, - form=None, - domains=None, - task_subtypes=None, - license=None, - socioeconomic_status=None, + form=['written'], + domains=['Web'], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + socioeconomic_status="mixed", annotations_creators=None, - dialect=None, - text_creation=None, + dialect=[], + text_creation="found", bibtex_citation=None, n_samples={"test": 45914}, avg_character_length={"test": 2641.03}, diff --git a/mteb/tasks/Retrieval/fra/SyntecRetrieval.py b/mteb/tasks/Retrieval/fra/SyntecRetrieval.py index 710d7a9268..0a9c84299c 100644 --- a/mteb/tasks/Retrieval/fra/SyntecRetrieval.py +++ b/mteb/tasks/Retrieval/fra/SyntecRetrieval.py @@ -23,15 +23,15 @@ class SyntecRetrieval(AbsTaskRetrieval): eval_splits=_EVAL_SPLITS, eval_langs=["fra-Latn"], main_score="ndcg_at_10", - date=None, - form=None, - domains=None, - task_subtypes=None, - license=None, - socioeconomic_status=None, - annotations_creators=None, + date=None, # not specified + form=['written'], + domains=["Legal"], + task_subtypes=[], + license="not specified.", + socioeconomic_status='high', + annotations_creators="human-annotated", dialect=[], - text_creation=None, + text_creation="created", bibtex_citation="""@misc{ciancone2024extending, title={Extending the Massive Text Embedding Benchmark to French}, author={Mathieu Ciancone and Imene Kerboua and Marion Schaeffer and Wissam Siblini},