From df68f8c7aadbee9b09da4193b9d6badc3990a97b Mon Sep 17 00:00:00 2001 From: Genta Indra Winata Date: Sat, 15 Jun 2024 16:50:51 -0400 Subject: [PATCH 1/9] fix: Add NollySenti Bitext Mining (MINERS) (#915) * add NollySentiBitextMining * update reference * update avg char * add points * add results --- docs/mmteb/points/915.jsonl | 2 + mteb/tasks/BitextMining/__init__.py | 1 + .../multilingual/NollySentiBitextMining.py | 49 +++++++++++++++ .../NollySentiBitextMining.json | 59 +++++++++++++++++++ .../NollySentiBitextMining.json | 59 +++++++++++++++++++ 5 files changed, 170 insertions(+) create mode 100644 docs/mmteb/points/915.jsonl create mode 100644 mteb/tasks/BitextMining/multilingual/NollySentiBitextMining.py create mode 100644 results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NollySentiBitextMining.json create mode 100644 results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NollySentiBitextMining.json diff --git a/docs/mmteb/points/915.jsonl b/docs/mmteb/points/915.jsonl new file mode 100644 index 0000000000..64ea007b58 --- /dev/null +++ b/docs/mmteb/points/915.jsonl @@ -0,0 +1,2 @@ +{"GitHub": "gentaiscool", "New dataset": 18} +{"GitHub": "KennethEnevoldsen", "Review PR": 2} \ No newline at end of file diff --git a/mteb/tasks/BitextMining/__init__.py b/mteb/tasks/BitextMining/__init__.py index 56791c0e33..709d19f158 100644 --- a/mteb/tasks/BitextMining/__init__.py +++ b/mteb/tasks/BitextMining/__init__.py @@ -11,6 +11,7 @@ from .multilingual.IN22GenBitextMining import * from .multilingual.IndicGenBenchFloresBitextMining import * from .multilingual.IWSLT2017BitextMinig import * +from .multilingual.NollySentiBitextMining import * from .multilingual.NorwegianCourtsBitextMining import * from .multilingual.NTREXBitextMining import * from .multilingual.NusaTranslationBitextMining import * diff --git a/mteb/tasks/BitextMining/multilingual/NollySentiBitextMining.py b/mteb/tasks/BitextMining/multilingual/NollySentiBitextMining.py new file mode 100644 index 0000000000..452e38bc71 --- /dev/null +++ b/mteb/tasks/BitextMining/multilingual/NollySentiBitextMining.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks import AbsTaskBitextMining, CrosslingualTask + +_LANGUAGES = { + "en-ha": ["eng-Latn", "hau-Latn"], + "en-ig": ["eng-Latn", "ibo-Latn"], + "en-pcm": ["eng-Latn", "pcm-Latn"], + "en-yo": ["eng-Latn", "yor-Latn"], +} + + +class NollySentiBitextMining(AbsTaskBitextMining, CrosslingualTask): + metadata = TaskMetadata( + name="NollySentiBitextMining", + dataset={ + "path": "gentaiscool/bitext_nollysenti_miners", + "revision": "d48254fbdb51af1ae7f20831aab0bccf0b70a19c", + }, + description="NollySenti is Nollywood movie reviews for five languages widely spoken in Nigeria (English, Hausa, Igbo, Nigerian-Pidgin, and Yoruba.", + reference="https://github.com/IyanuSh/NollySenti", + type="BitextMining", + category="s2s", + eval_splits=["train"], + eval_langs=_LANGUAGES, + main_score="f1", + date=("2022-01-01", "2023-01-01"), + form=["written"], + domains=["Social", "Reviews"], + task_subtypes=[], + license="CC BY-SA 4.0", + socioeconomic_status="mixed", + annotations_creators="human-annotated", + dialect=[], + text_creation="found", + bibtex_citation=""" + @inproceedings{shode2023nollysenti, + title={NollySenti: Leveraging Transfer Learning and Machine Translation for Nigerian Movie Sentiment Classification}, + author={Shode, Iyanuoluwa and Adelani, David Ifeoluwa and Peng, Jing and Feldman, Anna}, + booktitle={Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}, + pages={986--998}, + year={2023} + } + """, + n_samples={"train": 1640}, + avg_character_length={"train": 135.91}, + ) diff --git a/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NollySentiBitextMining.json b/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NollySentiBitextMining.json new file mode 100644 index 0000000000..1cd2201ec7 --- /dev/null +++ b/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NollySentiBitextMining.json @@ -0,0 +1,59 @@ +{ + "dataset_revision": "d48254fbdb51af1ae7f20831aab0bccf0b70a19c", + "evaluation_time": 2.4624860286712646, + "kg_co2_emissions": null, + "mteb_version": "1.12.30", + "scores": { + "train": [ + { + "accuracy": 0.6024390243902439, + "f1": 0.5550871080139372, + "hf_subset": "en-ha", + "languages": [ + "eng-Latn", + "hau-Latn" + ], + "main_score": 0.5550871080139372, + "precision": 0.5376023150062009, + "recall": 0.6024390243902439 + }, + { + "accuracy": 0.48048780487804876, + "f1": 0.41505282607430916, + "hf_subset": "en-ig", + "languages": [ + "eng-Latn", + "ibo-Latn" + ], + "main_score": 0.41505282607430916, + "precision": 0.3965549122087859, + "recall": 0.48048780487804876 + }, + { + "accuracy": 0.9341463414634147, + "f1": 0.9147967479674797, + "hf_subset": "en-pcm", + "languages": [ + "eng-Latn", + "pcm-Latn" + ], + "main_score": 0.9147967479674797, + "precision": 0.9058943089430895, + "recall": 0.9341463414634147 + }, + { + "accuracy": 0.37317073170731707, + "f1": 0.31159316293462636, + "hf_subset": "en-yo", + "languages": [ + "eng-Latn", + "yor-Latn" + ], + "main_score": 0.31159316293462636, + "precision": 0.29500760777451523, + "recall": 0.37317073170731707 + } + ] + }, + "task_name": "NollySentiBitextMining" +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NollySentiBitextMining.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NollySentiBitextMining.json new file mode 100644 index 0000000000..73df27c0e0 --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NollySentiBitextMining.json @@ -0,0 +1,59 @@ +{ + "dataset_revision": "d48254fbdb51af1ae7f20831aab0bccf0b70a19c", + "evaluation_time": 2.15173602104187, + "kg_co2_emissions": null, + "mteb_version": "1.12.30", + "scores": { + "train": [ + { + "accuracy": 0.0951219512195122, + "f1": 0.06774883543766314, + "hf_subset": "en-ha", + "languages": [ + "eng-Latn", + "hau-Latn" + ], + "main_score": 0.06774883543766314, + "precision": 0.0610613836833349, + "recall": 0.0951219512195122 + }, + { + "accuracy": 0.12195121951219512, + "f1": 0.08541203545776717, + "hf_subset": "en-ig", + "languages": [ + "eng-Latn", + "ibo-Latn" + ], + "main_score": 0.08541203545776717, + "precision": 0.07849906629318394, + "recall": 0.12195121951219512 + }, + { + "accuracy": 0.7682926829268293, + "f1": 0.7218855805441171, + "hf_subset": "en-pcm", + "languages": [ + "eng-Latn", + "pcm-Latn" + ], + "main_score": 0.7218855805441171, + "precision": 0.7047368282124379, + "recall": 0.7682926829268293 + }, + { + "accuracy": 0.07560975609756097, + "f1": 0.05108171503293455, + "hf_subset": "en-yo", + "languages": [ + "eng-Latn", + "yor-Latn" + ], + "main_score": 0.05108171503293455, + "precision": 0.046867132867132864, + "recall": 0.07560975609756097 + } + ] + }, + "task_name": "NollySentiBitextMining" +} \ No newline at end of file From 2a31c8c6f1e21fe5f91e794a313f1cd3083beff3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 15 Jun 2024 20:51:26 +0000 Subject: [PATCH 2/9] Update points table --- docs/mmteb/points_table.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/mmteb/points_table.md b/docs/mmteb/points_table.md index 14aaff9ec9..29f9d700db 100644 --- a/docs/mmteb/points_table.md +++ b/docs/mmteb/points_table.md @@ -4,7 +4,7 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended | GitHub | New dataset | Review PR | Coordination | Bug fixes | Dataset annotations | Running Models | Paper writing | New task | Total | |:------------------|--------------:|------------:|---------------:|------------:|----------------------:|-----------------:|----------------:|-----------:|--------:| -| KennethEnevoldsen | 68 | 256 | 11 | 81 | 35 | 0 | 0 | 0 | 451 | +| KennethEnevoldsen | 68 | 258 | 11 | 81 | 35 | 0 | 0 | 0 | 453 | | isaac-chung | 116 | 182 | 4 | 40 | 1 | 0 | 4 | 0 | 347 | | awinml | 292 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 294 | | imenelydiaker | 120 | 140 | 0 | 20 | 0 | 0 | 0 | 0 | 280 | @@ -18,8 +18,8 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended | MathieuCiancone | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | | schmarion | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | | GabrielSequeira | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | +| gentaiscool | 82 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 82 | | digantamisra98 | 71 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 71 | -| gentaiscool | 64 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 64 | | shreeya-dhakal | 54 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 62 | | Rysias | 58 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 58 | | asparius | 34 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 48 | From e13f0371341dd08b7868f5e049478dae891a27a5 Mon Sep 17 00:00:00 2001 From: Genta Indra Winata Date: Sat, 15 Jun 2024 16:51:49 -0400 Subject: [PATCH 3/9] fix: Add NusaParagraph Topic Classification (#927) * add data * update lang code * update metadata * update metadata * update metadata * update files * Update mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py Co-authored-by: Kenneth Enevoldsen * update avg char * add points * add results and fix lint --------- Co-authored-by: Kenneth Enevoldsen --- docs/mmteb/points/927.jsonl | 2 + mteb/tasks/Classification/__init__.py | 1 + .../NusaParagraphTopicClassification.py | 60 ++ .../NusaParagraphTopicClassification.json | 631 ++++++++++++++++++ .../NusaParagraphTopicClassification.json | 631 ++++++++++++++++++ 5 files changed, 1325 insertions(+) create mode 100644 docs/mmteb/points/927.jsonl create mode 100644 mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py create mode 100644 results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphTopicClassification.json create mode 100644 results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphTopicClassification.json diff --git a/docs/mmteb/points/927.jsonl b/docs/mmteb/points/927.jsonl new file mode 100644 index 0000000000..64ea007b58 --- /dev/null +++ b/docs/mmteb/points/927.jsonl @@ -0,0 +1,2 @@ +{"GitHub": "gentaiscool", "New dataset": 18} +{"GitHub": "KennethEnevoldsen", "Review PR": 2} \ No newline at end of file diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py index 89826de5cb..9559feeb80 100644 --- a/mteb/tasks/Classification/__init__.py +++ b/mteb/tasks/Classification/__init__.py @@ -86,6 +86,7 @@ from .multilingual.MultilingualSentimentClassification import * from .multilingual.NaijaSenti import * from .multilingual.NordicLangClassification import * +from .multilingual.NusaParagraphTopicClassification import * from .multilingual.NusaXSenti import * from .multilingual.ScalaClassification import * from .multilingual.SIB200Classification import * diff --git a/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py b/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py new file mode 100644 index 0000000000..c7d4900de3 --- /dev/null +++ b/mteb/tasks/Classification/multilingual/NusaParagraphTopicClassification.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks import AbsTaskClassification, MultilingualTask + +_LANGUAGES = { + "btk": ["bbc-Latn"], + "bew": ["bew-Latn"], + "bug": ["bug-Latn"], + "jav": ["jav-Latn"], + "mad": ["mad-Latn"], + "mak": ["mak-Latn"], + "min": ["min-Latn"], + "mui": ["mui-Latn"], + "rej": ["rej-Latn"], + "sun": ["sun-Latn"], +} + + +class NusaParagraphTopicClassification(MultilingualTask, AbsTaskClassification): + metadata = TaskMetadata( + name="NusaParagraphTopicClassification", + dataset={ + "path": "gentaiscool/nusaparagraph_topic", + "revision": "abb43f8d5b9510b8724b48283aca26c4733eac5d", + }, + description="NusaParagraphTopicClassification is a multi-class topic classification on 10 Indonesian languages.", + reference="https://github.com/IndoNLP/nusa-writes", + category="s2s", + type="Classification", + eval_splits=["test"], + eval_langs=_LANGUAGES, + main_score="f1", + date=("2021-08-01", "2022-07-01"), + form=["written"], + domains=["Non-fiction", "Fiction"], + task_subtypes=["Topic classification"], + license="Apache 2.0", + socioeconomic_status="mixed", + annotations_creators="human-annotated", + dialect=[], + text_creation="found", + bibtex_citation=""" + @inproceedings{cahyawijaya-etal-2023-nusawrites, + title = "NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages", + author = "Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and Linuwih, Hanung and Wilie, Bryan and Muridan, Galih and Winata, Genta and Moeljadi, David and Aji, Alham Fikri and Purwarianti, Ayu and Fung, Pascale", + editor = "Park, Jong C. and Arase, Yuki and Hu, Baotian and Lu, Wei and Wijaya, Derry and Purwarianti, Ayu and Krisnadhi, Adila Alfa", + booktitle = "Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)", + month = nov, + year = "2023", + address = "Nusa Dua, Bali", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2023.ijcnlp-main.60", + pages = "921--945", + } + """, + n_samples={"train": 15516, "validation": 2948, "test": 6250}, + avg_character_length={"train": 740.24, "validation": 740.66, "test": 740.71}, + ) diff --git a/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphTopicClassification.json b/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphTopicClassification.json new file mode 100644 index 0000000000..40e6be2592 --- /dev/null +++ b/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphTopicClassification.json @@ -0,0 +1,631 @@ +{ + "dataset_revision": "abb43f8d5b9510b8724b48283aca26c4733eac5d", + "evaluation_time": 31.910111904144287, + "kg_co2_emissions": null, + "mteb_version": "1.12.30", + "scores": { + "test": [ + { + "accuracy": 0.6334, + "f1": 0.6171183470608116, + "f1_weighted": 0.629464721103177, + "hf_subset": "btk", + "languages": [ + "bbc-Latn" + ], + "main_score": 0.6171183470608116, + "scores_per_experiment": [ + { + "accuracy": 0.59, + "f1": 0.5852878404051078, + "f1_weighted": 0.5958927453217002 + }, + { + "accuracy": 0.62, + "f1": 0.600587569376949, + "f1_weighted": 0.6015903595912627 + }, + { + "accuracy": 0.696, + "f1": 0.6760889882154619, + "f1_weighted": 0.6924571272692137 + }, + { + "accuracy": 0.64, + "f1": 0.6173446272666636, + "f1_weighted": 0.6327655573975481 + }, + { + "accuracy": 0.624, + "f1": 0.5986244170348578, + "f1_weighted": 0.6074905539687783 + }, + { + "accuracy": 0.658, + "f1": 0.6358373683853095, + "f1_weighted": 0.6509800878050068 + }, + { + "accuracy": 0.572, + "f1": 0.5690495790315273, + "f1_weighted": 0.581213494781202 + }, + { + "accuracy": 0.65, + "f1": 0.6304842998267575, + "f1_weighted": 0.6496684375932065 + }, + { + "accuracy": 0.644, + "f1": 0.6299587015672454, + "f1_weighted": 0.6502681630348887 + }, + { + "accuracy": 0.64, + "f1": 0.627920079498236, + "f1_weighted": 0.6323206842689628 + } + ] + }, + { + "accuracy": 0.68675, + "f1": 0.6678745690209558, + "f1_weighted": 0.6878928377540281, + "hf_subset": "bew", + "languages": [ + "bew-Latn" + ], + "main_score": 0.6678745690209558, + "scores_per_experiment": [ + { + "accuracy": 0.64625, + "f1": 0.6322727297666426, + "f1_weighted": 0.6592130253059881 + }, + { + "accuracy": 0.69125, + "f1": 0.663240860460134, + "f1_weighted": 0.6932953236033225 + }, + { + "accuracy": 0.66125, + "f1": 0.6531668710206416, + "f1_weighted": 0.6754609390663449 + }, + { + "accuracy": 0.695, + "f1": 0.6758684252299783, + "f1_weighted": 0.6964981076543918 + }, + { + "accuracy": 0.69, + "f1": 0.651473632085287, + "f1_weighted": 0.6754998687407036 + }, + { + "accuracy": 0.73125, + "f1": 0.7094399385343673, + "f1_weighted": 0.7314875092958686 + }, + { + "accuracy": 0.72875, + "f1": 0.7150086094859092, + "f1_weighted": 0.7296549282637496 + }, + { + "accuracy": 0.655, + "f1": 0.6451372316903234, + "f1_weighted": 0.6600691070006784 + }, + { + "accuracy": 0.65125, + "f1": 0.6358222537320755, + "f1_weighted": 0.6402935294276272 + }, + { + "accuracy": 0.7175, + "f1": 0.6973151382041993, + "f1_weighted": 0.7174560391816064 + } + ] + }, + { + "accuracy": 0.6503333333333334, + "f1": 0.44533490659642155, + "f1_weighted": 0.5778813318278211, + "hf_subset": "bug", + "languages": [ + "bug-Latn" + ], + "main_score": 0.44533490659642155, + "scores_per_experiment": [ + { + "accuracy": 0.61, + "f1": 0.4114791266678888, + "f1_weighted": 0.5414099616118933 + }, + { + "accuracy": 0.6433333333333333, + "f1": 0.4392030792840382, + "f1_weighted": 0.5696605604687246 + }, + { + "accuracy": 0.6766666666666666, + "f1": 0.4697495627302843, + "f1_weighted": 0.6069091885768013 + }, + { + "accuracy": 0.6466666666666666, + "f1": 0.44455962019758105, + "f1_weighted": 0.5736951562727561 + }, + { + "accuracy": 0.64, + "f1": 0.44168181982282356, + "f1_weighted": 0.5717484945947696 + }, + { + "accuracy": 0.66, + "f1": 0.4545023388773389, + "f1_weighted": 0.5880218872718873 + }, + { + "accuracy": 0.6466666666666666, + "f1": 0.4338039458102571, + "f1_weighted": 0.566220688043727 + }, + { + "accuracy": 0.6733333333333333, + "f1": 0.4684968853082196, + "f1_weighted": 0.6027469971019468 + }, + { + "accuracy": 0.65, + "f1": 0.4399980836214573, + "f1_weighted": 0.5740811958220885 + }, + { + "accuracy": 0.6566666666666666, + "f1": 0.4498746036443266, + "f1_weighted": 0.5843191885136169 + } + ] + }, + { + "accuracy": 0.68075, + "f1": 0.660155025408092, + "f1_weighted": 0.6759930020418418, + "hf_subset": "jav", + "languages": [ + "jav-Latn" + ], + "main_score": 0.660155025408092, + "scores_per_experiment": [ + { + "accuracy": 0.70375, + "f1": 0.6847649326728814, + "f1_weighted": 0.7077673546461796 + }, + { + "accuracy": 0.69125, + "f1": 0.6686803719870673, + "f1_weighted": 0.6880433606719385 + }, + { + "accuracy": 0.7, + "f1": 0.6784226413470056, + "f1_weighted": 0.6869195249472552 + }, + { + "accuracy": 0.6575, + "f1": 0.6350688729596057, + "f1_weighted": 0.6555931333347271 + }, + { + "accuracy": 0.62875, + "f1": 0.59445380923915, + "f1_weighted": 0.6117551600143775 + }, + { + "accuracy": 0.66875, + "f1": 0.6527602624386722, + "f1_weighted": 0.6653199559754615 + }, + { + "accuracy": 0.70625, + "f1": 0.6883303308128185, + "f1_weighted": 0.7058423615373113 + }, + { + "accuracy": 0.67125, + "f1": 0.6510035114005515, + "f1_weighted": 0.6642991582996877 + }, + { + "accuracy": 0.69625, + "f1": 0.6805498011412016, + "f1_weighted": 0.687553555196325 + }, + { + "accuracy": 0.68375, + "f1": 0.6675157200819667, + "f1_weighted": 0.6868364557951552 + } + ] + }, + { + "accuracy": 0.638, + "f1": 0.5995558948750676, + "f1_weighted": 0.6412866525067649, + "hf_subset": "mad", + "languages": [ + "mad-Latn" + ], + "main_score": 0.5995558948750676, + "scores_per_experiment": [ + { + "accuracy": 0.5742857142857143, + "f1": 0.5035475665060216, + "f1_weighted": 0.5649910543100422 + }, + { + "accuracy": 0.6442857142857142, + "f1": 0.6004080345307737, + "f1_weighted": 0.6551125552976121 + }, + { + "accuracy": 0.6457142857142857, + "f1": 0.6040474022035318, + "f1_weighted": 0.635934183231404 + }, + { + "accuracy": 0.6485714285714286, + "f1": 0.631406759176383, + "f1_weighted": 0.6548543594697213 + }, + { + "accuracy": 0.6471428571428571, + "f1": 0.6191920933012576, + "f1_weighted": 0.6581171550936497 + }, + { + "accuracy": 0.6428571428571429, + "f1": 0.6073610653229178, + "f1_weighted": 0.6452590930035225 + }, + { + "accuracy": 0.6285714285714286, + "f1": 0.5874675876369662, + "f1_weighted": 0.6258355550566823 + }, + { + "accuracy": 0.6857142857142857, + "f1": 0.6544493651670709, + "f1_weighted": 0.6907755482506079 + }, + { + "accuracy": 0.6457142857142857, + "f1": 0.6082372395298248, + "f1_weighted": 0.6587541063110648 + }, + { + "accuracy": 0.6171428571428571, + "f1": 0.5794418353759277, + "f1_weighted": 0.6232329150433427 + } + ] + }, + { + "accuracy": 0.6724285714285714, + "f1": 0.6511835482101187, + "f1_weighted": 0.6755249039430138, + "hf_subset": "mak", + "languages": [ + "mak-Latn" + ], + "main_score": 0.6511835482101187, + "scores_per_experiment": [ + { + "accuracy": 0.69, + "f1": 0.6717625788507542, + "f1_weighted": 0.6948107797216528 + }, + { + "accuracy": 0.6228571428571429, + "f1": 0.5984127933457115, + "f1_weighted": 0.6153350611191378 + }, + { + "accuracy": 0.6128571428571429, + "f1": 0.5952993377079482, + "f1_weighted": 0.6278027362558014 + }, + { + "accuracy": 0.6828571428571428, + "f1": 0.6590623678306446, + "f1_weighted": 0.6920721246352323 + }, + { + "accuracy": 0.7028571428571428, + "f1": 0.6765364771824226, + "f1_weighted": 0.7109664301625358 + }, + { + "accuracy": 0.6542857142857142, + "f1": 0.6381157901178615, + "f1_weighted": 0.6543146080123529 + }, + { + "accuracy": 0.69, + "f1": 0.6644927910614913, + "f1_weighted": 0.6941090188850209 + }, + { + "accuracy": 0.6985714285714286, + "f1": 0.671264262718122, + "f1_weighted": 0.6967453299707091 + }, + { + "accuracy": 0.66, + "f1": 0.6447745208364051, + "f1_weighted": 0.6640419778663437 + }, + { + "accuracy": 0.71, + "f1": 0.6921145624498259, + "f1_weighted": 0.7050509728013513 + } + ] + }, + { + "accuracy": 0.6964999999999999, + "f1": 0.6830295857444562, + "f1_weighted": 0.6915282347216338, + "hf_subset": "min", + "languages": [ + "min-Latn" + ], + "main_score": 0.6830295857444562, + "scores_per_experiment": [ + { + "accuracy": 0.68375, + "f1": 0.6702457537594522, + "f1_weighted": 0.6770744409139724 + }, + { + "accuracy": 0.71375, + "f1": 0.7014287021765294, + "f1_weighted": 0.7114548367533642 + }, + { + "accuracy": 0.68875, + "f1": 0.683918919282406, + "f1_weighted": 0.6859656175643266 + }, + { + "accuracy": 0.67875, + "f1": 0.6568105197110642, + "f1_weighted": 0.6691392787635434 + }, + { + "accuracy": 0.68375, + "f1": 0.6739963016587547, + "f1_weighted": 0.6845920477631463 + }, + { + "accuracy": 0.6825, + "f1": 0.6690417735157195, + "f1_weighted": 0.6757194311478294 + }, + { + "accuracy": 0.7225, + "f1": 0.7115950196187414, + "f1_weighted": 0.7184767185842589 + }, + { + "accuracy": 0.72125, + "f1": 0.7038165870974379, + "f1_weighted": 0.709298748442053 + }, + { + "accuracy": 0.68375, + "f1": 0.6652037875745539, + "f1_weighted": 0.6800049354748845 + }, + { + "accuracy": 0.70625, + "f1": 0.6942384930499023, + "f1_weighted": 0.7035562918089598 + } + ] + }, + { + "accuracy": 0.78625, + "f1": 0.7847577681704051, + "f1_weighted": 0.7827707451596769, + "hf_subset": "mui", + "languages": [ + "mui-Latn" + ], + "main_score": 0.7847577681704051, + "scores_per_experiment": [ + { + "accuracy": 0.8175, + "f1": 0.8136792768116344, + "f1_weighted": 0.8125701012207158 + }, + { + "accuracy": 0.77, + "f1": 0.7639447042089006, + "f1_weighted": 0.769491944554763 + }, + { + "accuracy": 0.765, + "f1": 0.760912949290902, + "f1_weighted": 0.7600984243783456 + }, + { + "accuracy": 0.7825, + "f1": 0.7813158754323395, + "f1_weighted": 0.7804007528036853 + }, + { + "accuracy": 0.7775, + "f1": 0.7783410732714138, + "f1_weighted": 0.7734143262568185 + }, + { + "accuracy": 0.8225, + "f1": 0.8237359479743818, + "f1_weighted": 0.8220814973595704 + }, + { + "accuracy": 0.815, + "f1": 0.8128262921702479, + "f1_weighted": 0.8106348212952583 + }, + { + "accuracy": 0.7775, + "f1": 0.7762081734572419, + "f1_weighted": 0.7741545910545169 + }, + { + "accuracy": 0.7775, + "f1": 0.7806474552515829, + "f1_weighted": 0.7733772607540079 + }, + { + "accuracy": 0.7575, + "f1": 0.7559659338354059, + "f1_weighted": 0.7514837319190876 + } + ] + }, + { + "accuracy": 0.7497142857142858, + "f1": 0.6618562226776146, + "f1_weighted": 0.7367486723643226, + "hf_subset": "rej", + "languages": [ + "rej-Latn" + ], + "main_score": 0.6618562226776146, + "scores_per_experiment": [ + { + "accuracy": 0.7371428571428571, + "f1": 0.6510866939036262, + "f1_weighted": 0.7239660803516531 + }, + { + "accuracy": 0.7628571428571429, + "f1": 0.6736682146254387, + "f1_weighted": 0.7515286713548246 + }, + { + "accuracy": 0.7542857142857143, + "f1": 0.6611847664353725, + "f1_weighted": 0.7389809391291607 + }, + { + "accuracy": 0.7514285714285714, + "f1": 0.6619694132559071, + "f1_weighted": 0.7411733361603973 + }, + { + "accuracy": 0.76, + "f1": 0.6756188990984007, + "f1_weighted": 0.7483000209864257 + }, + { + "accuracy": 0.76, + "f1": 0.6669734883264213, + "f1_weighted": 0.7446652254400593 + }, + { + "accuracy": 0.7342857142857143, + "f1": 0.6479386702247432, + "f1_weighted": 0.720606529215327 + }, + { + "accuracy": 0.7485714285714286, + "f1": 0.6623596989128226, + "f1_weighted": 0.7374543104494233 + }, + { + "accuracy": 0.7228571428571429, + "f1": 0.6421795712432292, + "f1_weighted": 0.7039382185066186 + }, + { + "accuracy": 0.7657142857142857, + "f1": 0.6755828107501833, + "f1_weighted": 0.7568733920493368 + } + ] + }, + { + "accuracy": 0.7243333333333334, + "f1": 0.7114371845316356, + "f1_weighted": 0.7194217975104253, + "hf_subset": "sun", + "languages": [ + "sun-Latn" + ], + "main_score": 0.7114371845316356, + "scores_per_experiment": [ + { + "accuracy": 0.7211111111111111, + "f1": 0.7032167463220967, + "f1_weighted": 0.7150200350748216 + }, + { + "accuracy": 0.73, + "f1": 0.720996646120255, + "f1_weighted": 0.7298742341967669 + }, + { + "accuracy": 0.7011111111111111, + "f1": 0.681590025980304, + "f1_weighted": 0.6949043469007721 + }, + { + "accuracy": 0.7011111111111111, + "f1": 0.6862001242253536, + "f1_weighted": 0.6967260831325527 + }, + { + "accuracy": 0.7744444444444445, + "f1": 0.7686245429585276, + "f1_weighted": 0.7696565484178968 + }, + { + "accuracy": 0.7377777777777778, + "f1": 0.7298825211940584, + "f1_weighted": 0.7401893793081832 + }, + { + "accuracy": 0.6766666666666666, + "f1": 0.6649937099597808, + "f1_weighted": 0.6719085635743007 + }, + { + "accuracy": 0.7444444444444445, + "f1": 0.7289737857535564, + "f1_weighted": 0.734925881023402 + }, + { + "accuracy": 0.7633333333333333, + "f1": 0.7527381552976653, + "f1_weighted": 0.7609444235963128 + }, + { + "accuracy": 0.6933333333333334, + "f1": 0.6771555875047582, + "f1_weighted": 0.6800684798792455 + } + ] + } + ] + }, + "task_name": "NusaParagraphTopicClassification" +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphTopicClassification.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphTopicClassification.json new file mode 100644 index 0000000000..3628506b52 --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphTopicClassification.json @@ -0,0 +1,631 @@ +{ + "dataset_revision": "abb43f8d5b9510b8724b48283aca26c4733eac5d", + "evaluation_time": 21.640820264816284, + "kg_co2_emissions": null, + "mteb_version": "1.12.30", + "scores": { + "test": [ + { + "accuracy": 0.502, + "f1": 0.49583124915326754, + "f1_weighted": 0.5049909818870219, + "hf_subset": "btk", + "languages": [ + "bbc-Latn" + ], + "main_score": 0.49583124915326754, + "scores_per_experiment": [ + { + "accuracy": 0.494, + "f1": 0.4923836681373438, + "f1_weighted": 0.502604128657887 + }, + { + "accuracy": 0.486, + "f1": 0.4957754504845821, + "f1_weighted": 0.49212831242694527 + }, + { + "accuracy": 0.498, + "f1": 0.48527722327799533, + "f1_weighted": 0.4987207967306596 + }, + { + "accuracy": 0.532, + "f1": 0.5146508810932297, + "f1_weighted": 0.5245154025413195 + }, + { + "accuracy": 0.516, + "f1": 0.5022581319237363, + "f1_weighted": 0.5076281198283419 + }, + { + "accuracy": 0.522, + "f1": 0.5161932075375283, + "f1_weighted": 0.5252784631941385 + }, + { + "accuracy": 0.494, + "f1": 0.4925507028978733, + "f1_weighted": 0.5096481832965343 + }, + { + "accuracy": 0.52, + "f1": 0.5058674282430002, + "f1_weighted": 0.5219240914691716 + }, + { + "accuracy": 0.474, + "f1": 0.46950643124956726, + "f1_weighted": 0.4886392403843932 + }, + { + "accuracy": 0.484, + "f1": 0.4838493666878183, + "f1_weighted": 0.4788230803408273 + } + ] + }, + { + "accuracy": 0.623875, + "f1": 0.609485091619269, + "f1_weighted": 0.6323509147743607, + "hf_subset": "bew", + "languages": [ + "bew-Latn" + ], + "main_score": 0.609485091619269, + "scores_per_experiment": [ + { + "accuracy": 0.5975, + "f1": 0.5916211091826291, + "f1_weighted": 0.617688274045643 + }, + { + "accuracy": 0.62875, + "f1": 0.6166869918521367, + "f1_weighted": 0.6466030104992363 + }, + { + "accuracy": 0.6675, + "f1": 0.6508769667597738, + "f1_weighted": 0.6668098124434177 + }, + { + "accuracy": 0.66625, + "f1": 0.6451817470042513, + "f1_weighted": 0.6709164840710699 + }, + { + "accuracy": 0.58875, + "f1": 0.5779693271763158, + "f1_weighted": 0.5950794139326674 + }, + { + "accuracy": 0.635, + "f1": 0.6212669930801686, + "f1_weighted": 0.6482536445815563 + }, + { + "accuracy": 0.6225, + "f1": 0.6056062227188277, + "f1_weighted": 0.6362560522316104 + }, + { + "accuracy": 0.6025, + "f1": 0.582070219968576, + "f1_weighted": 0.60118405063164 + }, + { + "accuracy": 0.5675, + "f1": 0.5536907573182387, + "f1_weighted": 0.5691135337806049 + }, + { + "accuracy": 0.6625, + "f1": 0.6498805811317716, + "f1_weighted": 0.6716048715261607 + } + ] + }, + { + "accuracy": 0.5686666666666668, + "f1": 0.5184796739036743, + "f1_weighted": 0.5576343730542004, + "hf_subset": "bug", + "languages": [ + "bug-Latn" + ], + "main_score": 0.5184796739036743, + "scores_per_experiment": [ + { + "accuracy": 0.5666666666666667, + "f1": 0.5091118896430855, + "f1_weighted": 0.5529787709918204 + }, + { + "accuracy": 0.5533333333333333, + "f1": 0.5102326371014173, + "f1_weighted": 0.5495934250015552 + }, + { + "accuracy": 0.55, + "f1": 0.5017254073738868, + "f1_weighted": 0.5435945549599398 + }, + { + "accuracy": 0.5966666666666667, + "f1": 0.5334320550375442, + "f1_weighted": 0.5786127152016327 + }, + { + "accuracy": 0.5566666666666666, + "f1": 0.5204244932192152, + "f1_weighted": 0.5452414686554135 + }, + { + "accuracy": 0.5633333333333334, + "f1": 0.5216067410039114, + "f1_weighted": 0.5540161648606108 + }, + { + "accuracy": 0.5933333333333334, + "f1": 0.5326338751019488, + "f1_weighted": 0.5819798352602745 + }, + { + "accuracy": 0.58, + "f1": 0.5250692251271024, + "f1_weighted": 0.5642819856464932 + }, + { + "accuracy": 0.5566666666666666, + "f1": 0.5148036197321377, + "f1_weighted": 0.5493701772027021 + }, + { + "accuracy": 0.57, + "f1": 0.515756795696493, + "f1_weighted": 0.5566746327615613 + } + ] + }, + { + "accuracy": 0.498625, + "f1": 0.484702668509168, + "f1_weighted": 0.5080896250345485, + "hf_subset": "jav", + "languages": [ + "jav-Latn" + ], + "main_score": 0.484702668509168, + "scores_per_experiment": [ + { + "accuracy": 0.47625, + "f1": 0.46738222141513686, + "f1_weighted": 0.48301727833616664 + }, + { + "accuracy": 0.485, + "f1": 0.4753544376312615, + "f1_weighted": 0.4953950215027018 + }, + { + "accuracy": 0.49625, + "f1": 0.48459195024714796, + "f1_weighted": 0.5015912846827465 + }, + { + "accuracy": 0.52625, + "f1": 0.5115138176602833, + "f1_weighted": 0.5383827968174302 + }, + { + "accuracy": 0.495, + "f1": 0.4804018819929509, + "f1_weighted": 0.5053251777500593 + }, + { + "accuracy": 0.51625, + "f1": 0.49999253357728735, + "f1_weighted": 0.517854193724271 + }, + { + "accuracy": 0.4625, + "f1": 0.45619860700438875, + "f1_weighted": 0.4871324617537307 + }, + { + "accuracy": 0.49375, + "f1": 0.4718442332968104, + "f1_weighted": 0.5033444838913722 + }, + { + "accuracy": 0.5175, + "f1": 0.5013059251492795, + "f1_weighted": 0.5248308426522561 + }, + { + "accuracy": 0.5175, + "f1": 0.49844107711713403, + "f1_weighted": 0.5240227092347505 + } + ] + }, + { + "accuracy": 0.4578571428571429, + "f1": 0.4293434328627389, + "f1_weighted": 0.474337380884348, + "hf_subset": "mad", + "languages": [ + "mad-Latn" + ], + "main_score": 0.4293434328627389, + "scores_per_experiment": [ + { + "accuracy": 0.4142857142857143, + "f1": 0.36830308087843344, + "f1_weighted": 0.4231514842820036 + }, + { + "accuracy": 0.4757142857142857, + "f1": 0.4311602002297987, + "f1_weighted": 0.4869944417098914 + }, + { + "accuracy": 0.43142857142857144, + "f1": 0.3963653473783615, + "f1_weighted": 0.4346653723331879 + }, + { + "accuracy": 0.4442857142857143, + "f1": 0.4315235896654613, + "f1_weighted": 0.4793797268903201 + }, + { + "accuracy": 0.44571428571428573, + "f1": 0.4313647527747436, + "f1_weighted": 0.4676159906990612 + }, + { + "accuracy": 0.44, + "f1": 0.4159285938894658, + "f1_weighted": 0.44753026080079944 + }, + { + "accuracy": 0.4785714285714286, + "f1": 0.46335781606938115, + "f1_weighted": 0.5037925549493945 + }, + { + "accuracy": 0.5228571428571429, + "f1": 0.49904675560668343, + "f1_weighted": 0.5389392474919658 + }, + { + "accuracy": 0.47285714285714286, + "f1": 0.4427125121202358, + "f1_weighted": 0.48814505670401 + }, + { + "accuracy": 0.45285714285714285, + "f1": 0.41367168001482396, + "f1_weighted": 0.4731596729828466 + } + ] + }, + { + "accuracy": 0.5084285714285715, + "f1": 0.48917602380236913, + "f1_weighted": 0.5202075190662195, + "hf_subset": "mak", + "languages": [ + "mak-Latn" + ], + "main_score": 0.48917602380236913, + "scores_per_experiment": [ + { + "accuracy": 0.5128571428571429, + "f1": 0.4902509671033717, + "f1_weighted": 0.5251142347684558 + }, + { + "accuracy": 0.5128571428571429, + "f1": 0.4911172384032806, + "f1_weighted": 0.5173883280980776 + }, + { + "accuracy": 0.45285714285714285, + "f1": 0.4405715557485829, + "f1_weighted": 0.4701317315942821 + }, + { + "accuracy": 0.49714285714285716, + "f1": 0.4797791249551845, + "f1_weighted": 0.5097664107503609 + }, + { + "accuracy": 0.5514285714285714, + "f1": 0.5186223018361971, + "f1_weighted": 0.55930276399965 + }, + { + "accuracy": 0.47714285714285715, + "f1": 0.46146332809137885, + "f1_weighted": 0.49200874830422736 + }, + { + "accuracy": 0.5157142857142857, + "f1": 0.5059612429055371, + "f1_weighted": 0.5335933140210743 + }, + { + "accuracy": 0.5385714285714286, + "f1": 0.5068910053350097, + "f1_weighted": 0.5453236601032398 + }, + { + "accuracy": 0.49142857142857144, + "f1": 0.48385599556216274, + "f1_weighted": 0.4992476422558179 + }, + { + "accuracy": 0.5342857142857143, + "f1": 0.5132474780829864, + "f1_weighted": 0.5501983567670095 + } + ] + }, + { + "accuracy": 0.599125, + "f1": 0.592963903953233, + "f1_weighted": 0.6033851196187611, + "hf_subset": "min", + "languages": [ + "min-Latn" + ], + "main_score": 0.592963903953233, + "scores_per_experiment": [ + { + "accuracy": 0.59125, + "f1": 0.5827173261341706, + "f1_weighted": 0.596372571213758 + }, + { + "accuracy": 0.63, + "f1": 0.6165168681838411, + "f1_weighted": 0.6321415731411145 + }, + { + "accuracy": 0.59125, + "f1": 0.5882802011720217, + "f1_weighted": 0.5975118239207026 + }, + { + "accuracy": 0.59375, + "f1": 0.5780553154747631, + "f1_weighted": 0.5957930597057401 + }, + { + "accuracy": 0.565, + "f1": 0.5777521634408125, + "f1_weighted": 0.577006119572099 + }, + { + "accuracy": 0.56875, + "f1": 0.5735399420881627, + "f1_weighted": 0.5712645373956117 + }, + { + "accuracy": 0.64625, + "f1": 0.6247343172455967, + "f1_weighted": 0.6462731212642467 + }, + { + "accuracy": 0.64, + "f1": 0.6333319506660035, + "f1_weighted": 0.6425678101934124 + }, + { + "accuracy": 0.58625, + "f1": 0.5789297989013357, + "f1_weighted": 0.5919365192699684 + }, + { + "accuracy": 0.57875, + "f1": 0.5757811562256222, + "f1_weighted": 0.5829840605109579 + } + ] + }, + { + "accuracy": 0.70775, + "f1": 0.7095409755484919, + "f1_weighted": 0.7087379049270817, + "hf_subset": "mui", + "languages": [ + "mui-Latn" + ], + "main_score": 0.7095409755484919, + "scores_per_experiment": [ + { + "accuracy": 0.7225, + "f1": 0.7181470763951315, + "f1_weighted": 0.7193019953884903 + }, + { + "accuracy": 0.705, + "f1": 0.7051496104917943, + "f1_weighted": 0.7096501532593039 + }, + { + "accuracy": 0.715, + "f1": 0.716795819197157, + "f1_weighted": 0.716824773006319 + }, + { + "accuracy": 0.6975, + "f1": 0.6994367470218236, + "f1_weighted": 0.6974375682615824 + }, + { + "accuracy": 0.7225, + "f1": 0.7218389671504905, + "f1_weighted": 0.722994535231039 + }, + { + "accuracy": 0.7, + "f1": 0.7063910617310575, + "f1_weighted": 0.7032539841645798 + }, + { + "accuracy": 0.7075, + "f1": 0.7159542679163657, + "f1_weighted": 0.7087279623828745 + }, + { + "accuracy": 0.6825, + "f1": 0.6888119175399933, + "f1_weighted": 0.6828403776671707 + }, + { + "accuracy": 0.705, + "f1": 0.7041029633721217, + "f1_weighted": 0.7057358718482053 + }, + { + "accuracy": 0.72, + "f1": 0.7187813246689843, + "f1_weighted": 0.7206118280612531 + } + ] + }, + { + "accuracy": 0.65, + "f1": 0.6511645155269374, + "f1_weighted": 0.652331326218501, + "hf_subset": "rej", + "languages": [ + "rej-Latn" + ], + "main_score": 0.6511645155269374, + "scores_per_experiment": [ + { + "accuracy": 0.6771428571428572, + "f1": 0.675208982723501, + "f1_weighted": 0.6777449602546418 + }, + { + "accuracy": 0.6542857142857142, + "f1": 0.6500828083159753, + "f1_weighted": 0.6569148737908412 + }, + { + "accuracy": 0.6314285714285715, + "f1": 0.6348749406891112, + "f1_weighted": 0.6293023000727321 + }, + { + "accuracy": 0.6828571428571428, + "f1": 0.6807494517074276, + "f1_weighted": 0.6858182469697525 + }, + { + "accuracy": 0.64, + "f1": 0.6497710882318689, + "f1_weighted": 0.6463249580751427 + }, + { + "accuracy": 0.64, + "f1": 0.6416243820254892, + "f1_weighted": 0.6429522457917923 + }, + { + "accuracy": 0.64, + "f1": 0.6391267680157248, + "f1_weighted": 0.6442285014250654 + }, + { + "accuracy": 0.6685714285714286, + "f1": 0.6772072785668375, + "f1_weighted": 0.6744002548060357 + }, + { + "accuracy": 0.64, + "f1": 0.6280601382517603, + "f1_weighted": 0.6372182741711646 + }, + { + "accuracy": 0.6257142857142857, + "f1": 0.6349393167416787, + "f1_weighted": 0.6284086468278409 + } + ] + }, + { + "accuracy": 0.5413333333333333, + "f1": 0.5376816868659201, + "f1_weighted": 0.5510306569469691, + "hf_subset": "sun", + "languages": [ + "sun-Latn" + ], + "main_score": 0.5376816868659201, + "scores_per_experiment": [ + { + "accuracy": 0.5155555555555555, + "f1": 0.5124935121451668, + "f1_weighted": 0.5257431334680848 + }, + { + "accuracy": 0.5522222222222222, + "f1": 0.5505331734375645, + "f1_weighted": 0.5641846244150112 + }, + { + "accuracy": 0.5522222222222222, + "f1": 0.5471253203820344, + "f1_weighted": 0.567516360357803 + }, + { + "accuracy": 0.54, + "f1": 0.5288053651337596, + "f1_weighted": 0.5406112101952358 + }, + { + "accuracy": 0.5288888888888889, + "f1": 0.5330090553749861, + "f1_weighted": 0.5408310949835916 + }, + { + "accuracy": 0.5577777777777778, + "f1": 0.5595453801208847, + "f1_weighted": 0.5712989239500295 + }, + { + "accuracy": 0.5033333333333333, + "f1": 0.5009258996416388, + "f1_weighted": 0.5113015355658886 + }, + { + "accuracy": 0.5622222222222222, + "f1": 0.5608093274440646, + "f1_weighted": 0.5735566083414523 + }, + { + "accuracy": 0.5444444444444444, + "f1": 0.5383340120878801, + "f1_weighted": 0.5539581073844755 + }, + { + "accuracy": 0.5566666666666666, + "f1": 0.5452358228912215, + "f1_weighted": 0.561304970808119 + } + ] + } + ] + }, + "task_name": "NusaParagraphTopicClassification" +} \ No newline at end of file From 4a8bfd2cd9b1acc2d14bf4a45b84719d57104330 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 15 Jun 2024 20:54:00 +0000 Subject: [PATCH 4/9] Update points table --- docs/mmteb/points_table.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/mmteb/points_table.md b/docs/mmteb/points_table.md index 29f9d700db..1a4a1c12eb 100644 --- a/docs/mmteb/points_table.md +++ b/docs/mmteb/points_table.md @@ -4,7 +4,7 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended | GitHub | New dataset | Review PR | Coordination | Bug fixes | Dataset annotations | Running Models | Paper writing | New task | Total | |:------------------|--------------:|------------:|---------------:|------------:|----------------------:|-----------------:|----------------:|-----------:|--------:| -| KennethEnevoldsen | 68 | 258 | 11 | 81 | 35 | 0 | 0 | 0 | 453 | +| KennethEnevoldsen | 68 | 260 | 11 | 81 | 35 | 0 | 0 | 0 | 455 | | isaac-chung | 116 | 182 | 4 | 40 | 1 | 0 | 4 | 0 | 347 | | awinml | 292 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 294 | | imenelydiaker | 120 | 140 | 0 | 20 | 0 | 0 | 0 | 0 | 280 | @@ -14,11 +14,11 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended | wissam-sib | 134 | 6 | 0 | 4 | 0 | 0 | 0 | 0 | 144 | | jupyterjazz | 108 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 108 | | SaitejaUtpala | 102 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 102 | +| gentaiscool | 100 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 100 | | dokato | 82 | 4 | 0 | 8 | 0 | 0 | 0 | 0 | 94 | -| MathieuCiancone | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | | schmarion | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | +| MathieuCiancone | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | | GabrielSequeira | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | -| gentaiscool | 82 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 82 | | digantamisra98 | 71 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 71 | | shreeya-dhakal | 54 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 62 | | Rysias | 58 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 58 | @@ -30,23 +30,23 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended | rafalposwiata | 36 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 36 | | bp-high | 36 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 36 | | akshita-sukhlecha | 34 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 34 | -| ShawonAshraf | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | | jphme | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | | rasdani | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | +| ShawonAshraf | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | | loicmagne | 0 | 0 | 0 | 28 | 0 | 0 | 0 | 0 | 28 | | bjoernpl | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | | violenil | 26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26 | | kranthigv | 20 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 26 | | dwzhu-pku | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 24 | -| taeminlee | 22 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 22 | | jankounchained | 14 | 0 | 0 | 8 | 0 | 0 | 0 | 0 | 22 | +| taeminlee | 22 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 22 | | crystina-z | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | -| mrshu | 16 | 4 | 0 | 0 | 1 | 0 | 0 | 0 | 21 | | hgissbkh | 0 | 0 | 0 | 13 | 0 | 0 | 3 | 5 | 21 | +| mrshu | 16 | 4 | 0 | 0 | 1 | 0 | 0 | 0 | 21 | +| mmhamdy | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20 | | rbroc | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20 | -| Andrian0s | 14 | 4 | 0 | 2 | 0 | 0 | 0 | 0 | 20 | | AlexeyVatolin | 0 | 0 | 0 | 20 | 0 | 0 | 0 | 0 | 20 | -| mmhamdy | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20 | +| Andrian0s | 14 | 4 | 0 | 2 | 0 | 0 | 0 | 0 | 20 | | ManuelFay | 2 | 0 | 0 | 13 | 0 | 0 | 0 | 5 | 20 | | manandey | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | | MartinBernstorff | 2 | 8 | 0 | 7 | 0 | 0 | 0 | 0 | 17 | From 0f5181945db20410e9d1f3af3be2060b411aadb4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 15 Jun 2024 20:58:25 +0000 Subject: [PATCH 5/9] Update tasks table --- docs/tasks.md | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/docs/tasks.md b/docs/tasks.md index b961fba0b2..b68239bf5e 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -332,10 +332,12 @@ The following tables give you an overview of the tasks in MTEB. | [News21InstructionRetrieval](https://arxiv.org/abs/2403.15246) (Orion Weller, 2024) | ['eng'] | InstructionRetrieval | s2p | [News] | {'eng': 61906} | {'eng': 2983.724665391969} | | [NewsClassification](https://arxiv.org/abs/1509.01626) (Zhang et al., 2015) | ['eng'] | Classification | s2s | [News] | {'test': 7600} | {'test': 235.29} | | [NoRecClassification](https://aclanthology.org/L18-1661/) | ['nob'] | Classification | s2s | | {'test': 2050} | {'test': 82.0} | +| [NollySentiBitextMining](https://github.com/IyanuSh/NollySenti) (Shode et al., 2023) | ['eng', 'hau', 'ibo', 'pcm', 'yor'] | BitextMining | s2s | [Social, Reviews] | {'train': 1640} | {'train': 135.91} | | [NorQuadRetrieval](https://aclanthology.org/2023.nodalida-1.17/) | ['nob'] | Retrieval | p2p | [Encyclopaedic, Non-fiction] | {'test': 2602} | {'test': 502.19} | | [NordicLangClassification](https://aclanthology.org/2021.vardial-1.8/) | ['dan', 'fao', 'isl', 'nno', 'nob', 'swe'] | Classification | s2s | | {'test': 3000} | {'test': 78.2} | | [NorwegianCourtsBitextMining](https://opus.nlpl.eu/index.php) (Tiedemann et al., 2020) | ['nno', 'nob'] | BitextMining | s2s | [Legal] | {'test': 2050} | {'test': 1884.0} | | [NorwegianParliamentClassification](https://huggingface.co/datasets/NbAiLab/norwegian_parliament) | ['nob'] | Classification | s2s | | {'test': 1200, 'validation': 1200} | {'test': 1884.0, 'validation': 1911.0} | +| [NusaParagraphTopicClassification](https://github.com/IndoNLP/nusa-writes) | ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | Classification | s2s | [Non-fiction, Fiction] | {'train': 15516, 'validation': 2948, 'test': 6250} | {'train': 740.24, 'validation': 740.66, 'test': 740.71} | | [NusaTranslationBitextMining](https://huggingface.co/datasets/indonlp/nusatranslation_mt) (Cahyawijaya et al., 2023) | ['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | BitextMining | s2s | [Social] | {'train': 50200} | {'train': 147.01} | | [NusaX-senti](https://arxiv.org/abs/2205.15960) (Winata et al., 2022) | ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] | Classification | s2s | [Reviews, Web, Social, Constructed] | {'test': 4800} | {'test': 52.4} | | [NusaXBitextMining](https://huggingface.co/datasets/indonlp/NusaX-senti/) (Winata et al., 2023) | ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] | BitextMining | s2s | [Reviews] | {'train': 5500} | {'train': 157.15} | @@ -642,7 +644,7 @@ The following tables give you an overview of the tasks in MTEB. | bao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bba | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bbc | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| bbc | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -655,7 +657,7 @@ The following tables give you an overview of the tasks in MTEB. | beo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | ber | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | beu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| bew | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bgc | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bgs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bgt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -703,7 +705,7 @@ The following tables give you an overview of the tasks in MTEB. | bsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bsp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bug | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| bug | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | buk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bul | 3 | 5 | 0 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | | bus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -821,7 +823,7 @@ The following tables give you an overview of the tasks in MTEB. | ell | 3 | 7 | 0 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | | emi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | emp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eng | 13 | 144 | 15 | 3 | 1 | 8 | 7 | 54 | 13 | 1 | +| eng | 14 | 144 | 15 | 3 | 1 | 8 | 7 | 54 | 13 | 1 | | enq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | epo | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | eri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -893,7 +895,7 @@ The following tables give you an overview of the tasks in MTEB. | gym | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | gyr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | hat | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | -| hau | 3 | 6 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | +| hau | 4 | 6 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | | haw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | hbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | hch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -921,7 +923,7 @@ The following tables give you an overview of the tasks in MTEB. | hvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | hye | 3 | 4 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | | ian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ibo | 2 | 6 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | +| ibo | 3 | 6 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | ido | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | ign | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | ikk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -943,7 +945,7 @@ The following tables give you an overview of the tasks in MTEB. | jac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | jae | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | jao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jav | 4 | 6 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | +| jav | 4 | 7 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | jic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | jid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | jiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1068,11 +1070,11 @@ The following tables give you an overview of the tasks in MTEB. | lvs | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | lww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | maa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mad | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| mad | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mag | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mai | 4 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | maj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| mak | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mal | 7 | 8 | 1 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | | mam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | maq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1115,7 +1117,7 @@ The following tables give you an overview of the tasks in MTEB. | mig | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mih | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mil | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| min | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| min | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1157,7 +1159,7 @@ The following tables give you an overview of the tasks in MTEB. | msy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mti | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| mui | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mup | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | muy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1265,7 +1267,7 @@ The following tables give you an overview of the tasks in MTEB. | pao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | pap | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | pbt | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | -| pcm | 0 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| pcm | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | pes | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | pib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | pio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1314,7 +1316,7 @@ The following tables give you an overview of the tasks in MTEB. | rai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | raj | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | reg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rej | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| rej | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | rgu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | rkb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | rmc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1387,7 +1389,7 @@ The following tables give you an overview of the tasks in MTEB. | stp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | sua | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | sue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sun | 3 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | +| sun | 3 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | sus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | suz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | svk | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1549,7 +1551,7 @@ The following tables give you an overview of the tasks in MTEB. | yle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | yml | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | yon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yor | 3 | 6 | 2 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | +| yor | 4 | 6 | 2 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | | yrb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | yre | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | yss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1593,7 +1595,7 @@ The following tables give you an overview of the tasks in MTEB. | zty | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | zul | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | zyp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| Total | 1385 | 970 | 107 | 3 | 28 | 67 | 46 | 335 | 85 | 2 | +| Total | 1390 | 980 | 107 | 3 | 28 | 67 | 46 | 335 | 85 | 2 | From 5e4ad442535a20719f42457ca034269df3de3646 Mon Sep 17 00:00:00 2001 From: Genta Indra Winata Date: Sat, 15 Jun 2024 16:58:46 -0400 Subject: [PATCH 6/9] fix: Add NusaParagraph Emotion Classification (#928) * add NusaTranslationEmotionClassification * update name * add new task * add new task * Update mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py Co-authored-by: Kenneth Enevoldsen * add sizes * add point * add results * update desc --------- Co-authored-by: Kenneth Enevoldsen --- docs/mmteb/points/928.jsonl | 2 + mteb/abstasks/TaskMetadata.py | 1 + mteb/tasks/Classification/__init__.py | 1 + .../NusaParagraphEmotionClassification.py | 60 ++ .../NusaParagraphEmotionClassification.json | 631 ++++++++++++++++++ .../NusaParagraphEmotionClassification.json | 631 ++++++++++++++++++ 6 files changed, 1326 insertions(+) create mode 100644 docs/mmteb/points/928.jsonl create mode 100644 mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py create mode 100644 results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json create mode 100644 results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphEmotionClassification.json diff --git a/docs/mmteb/points/928.jsonl b/docs/mmteb/points/928.jsonl new file mode 100644 index 0000000000..5c807f432f --- /dev/null +++ b/docs/mmteb/points/928.jsonl @@ -0,0 +1,2 @@ +{"GitHub": "gentaiscool", "New dataset": 2} +{"GitHub": "KennethEnevoldsen", "Review PR": 2} \ No newline at end of file diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 2784491b7a..c81f302163 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -35,6 +35,7 @@ "Cross-Lingual Semantic Discrimination", "Textual Entailment", "Counterfactual Detection", + "Emotion classification", ] TASK_DOMAIN = Literal[ diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py index 9559feeb80..529ce5cba6 100644 --- a/mteb/tasks/Classification/__init__.py +++ b/mteb/tasks/Classification/__init__.py @@ -86,6 +86,7 @@ from .multilingual.MultilingualSentimentClassification import * from .multilingual.NaijaSenti import * from .multilingual.NordicLangClassification import * +from .multilingual.NusaParagraphEmotionClassification import * from .multilingual.NusaParagraphTopicClassification import * from .multilingual.NusaXSenti import * from .multilingual.ScalaClassification import * diff --git a/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py b/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py new file mode 100644 index 0000000000..1a565764ef --- /dev/null +++ b/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks import AbsTaskClassification, MultilingualTask + +_LANGUAGES = { + "btk": ["bbc-Latn"], + "bew": ["bew-Latn"], + "bug": ["bug-Latn"], + "jav": ["jav-Latn"], + "mad": ["mad-Latn"], + "mak": ["mak-Latn"], + "min": ["min-Latn"], + "mui": ["mui-Latn"], + "rej": ["rej-Latn"], + "sun": ["sun-Latn"], +} + + +class NusaParagraphEmotionClassification(MultilingualTask, AbsTaskClassification): + metadata = TaskMetadata( + name="NusaParagraphEmotionClassification", + dataset={ + "path": "gentaiscool/nusaparagraph_emot", + "revision": "c61e8c3ee47d2dce296e9601195916b54c21d575", + }, + description="NusaParagraphEmotionClassification is a multi-class emotion classification on 10 Indonesian languages from the NusaParagraph dataset.", + reference="https://github.com/IndoNLP/nusa-writes", + category="s2s", + type="Classification", + eval_splits=["test"], + eval_langs=_LANGUAGES, + main_score="f1", + date=("2021-08-01", "2022-07-01"), + form=["written"], + domains=["Non-fiction", "Fiction"], + task_subtypes=["Emotion classification"], + license="Apache 2.0", + socioeconomic_status="mixed", + annotations_creators="human-annotated", + dialect=[], + text_creation="found", + bibtex_citation=""" + @inproceedings{cahyawijaya-etal-2023-nusawrites, + title = "NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages", + author = "Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and Linuwih, Hanung and Wilie, Bryan and Muridan, Galih and Winata, Genta and Moeljadi, David and Aji, Alham Fikri and Purwarianti, Ayu and Fung, Pascale", + editor = "Park, Jong C. and Arase, Yuki and Hu, Baotian and Lu, Wei and Wijaya, Derry and Purwarianti, Ayu and Krisnadhi, Adila Alfa", + booktitle = "Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)", + month = nov, + year = "2023", + address = "Nusa Dua, Bali", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2023.ijcnlp-main.60", + pages = "921--945", + } + """, + n_samples={"train": 15516, "validation": 2948, "test": 6250}, + avg_character_length={"train": 740.24, "validation": 740.66, "test": 740.71}, + ) diff --git a/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json b/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json new file mode 100644 index 0000000000..238beb21fa --- /dev/null +++ b/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json @@ -0,0 +1,631 @@ +{ + "dataset_revision": "c61e8c3ee47d2dce296e9601195916b54c21d575", + "evaluation_time": 30.681774854660034, + "kg_co2_emissions": null, + "mteb_version": "1.12.30", + "scores": { + "test": [ + { + "accuracy": 0.3214, + "f1": 0.3151323771508966, + "f1_weighted": 0.3257212379765135, + "hf_subset": "btk", + "languages": [ + "bbc-Latn" + ], + "main_score": 0.3151323771508966, + "scores_per_experiment": [ + { + "accuracy": 0.338, + "f1": 0.3236514463623961, + "f1_weighted": 0.3397102191118184 + }, + { + "accuracy": 0.29, + "f1": 0.2856258218344667, + "f1_weighted": 0.2956383928680353 + }, + { + "accuracy": 0.3, + "f1": 0.28624652661584127, + "f1_weighted": 0.2962150927946865 + }, + { + "accuracy": 0.338, + "f1": 0.33311973084244556, + "f1_weighted": 0.3430751513090468 + }, + { + "accuracy": 0.308, + "f1": 0.2973040036999982, + "f1_weighted": 0.30312791955388485 + }, + { + "accuracy": 0.302, + "f1": 0.3010583265372783, + "f1_weighted": 0.30175753237274233 + }, + { + "accuracy": 0.318, + "f1": 0.31745771043978044, + "f1_weighted": 0.32994312289240507 + }, + { + "accuracy": 0.374, + "f1": 0.3713451815896451, + "f1_weighted": 0.3857351469335026 + }, + { + "accuracy": 0.284, + "f1": 0.27334223196627144, + "f1_weighted": 0.2866092974965804 + }, + { + "accuracy": 0.362, + "f1": 0.3621727916208433, + "f1_weighted": 0.37540050443243267 + } + ] + }, + { + "accuracy": 0.4245, + "f1": 0.39380827342537683, + "f1_weighted": 0.42087199071649967, + "hf_subset": "bew", + "languages": [ + "bew-Latn" + ], + "main_score": 0.39380827342537683, + "scores_per_experiment": [ + { + "accuracy": 0.39375, + "f1": 0.3762872388681893, + "f1_weighted": 0.4109293102948807 + }, + { + "accuracy": 0.42, + "f1": 0.387323735355736, + "f1_weighted": 0.41738570468144703 + }, + { + "accuracy": 0.455, + "f1": 0.41230570882008155, + "f1_weighted": 0.4520319864093622 + }, + { + "accuracy": 0.35125, + "f1": 0.329979208092284, + "f1_weighted": 0.35440600907433795 + }, + { + "accuracy": 0.42875, + "f1": 0.3906887129230144, + "f1_weighted": 0.42110910079651737 + }, + { + "accuracy": 0.48625, + "f1": 0.451543696765524, + "f1_weighted": 0.4781291288964929 + }, + { + "accuracy": 0.4025, + "f1": 0.3659936419296737, + "f1_weighted": 0.39636583824770427 + }, + { + "accuracy": 0.42125, + "f1": 0.40038284642720695, + "f1_weighted": 0.408484444410578 + }, + { + "accuracy": 0.43, + "f1": 0.4037500439412352, + "f1_weighted": 0.41993327815762227 + }, + { + "accuracy": 0.45625, + "f1": 0.4198279011308231, + "f1_weighted": 0.44994510619605455 + } + ] + }, + { + "accuracy": 0.458, + "f1": 0.3387445290798234, + "f1_weighted": 0.4261662839404307, + "hf_subset": "bug", + "languages": [ + "bug-Latn" + ], + "main_score": 0.3387445290798234, + "scores_per_experiment": [ + { + "accuracy": 0.4166666666666667, + "f1": 0.3095733756091684, + "f1_weighted": 0.38590243994829815 + }, + { + "accuracy": 0.44, + "f1": 0.33298078830292477, + "f1_weighted": 0.4128807512434945 + }, + { + "accuracy": 0.47333333333333333, + "f1": 0.3321163002918808, + "f1_weighted": 0.4302824452044468 + }, + { + "accuracy": 0.4866666666666667, + "f1": 0.35654576410497985, + "f1_weighted": 0.4475350570654345 + }, + { + "accuracy": 0.48, + "f1": 0.35006772312202955, + "f1_weighted": 0.44200862091903254 + }, + { + "accuracy": 0.46, + "f1": 0.35517093585649745, + "f1_weighted": 0.4263934378077041 + }, + { + "accuracy": 0.43666666666666665, + "f1": 0.32089116542532686, + "f1_weighted": 0.4111403935838718 + }, + { + "accuracy": 0.4666666666666667, + "f1": 0.35239548109818564, + "f1_weighted": 0.43843235041632106 + }, + { + "accuracy": 0.46, + "f1": 0.3396318106844423, + "f1_weighted": 0.4334851347482926 + }, + { + "accuracy": 0.46, + "f1": 0.3380719463027983, + "f1_weighted": 0.43360220846741054 + } + ] + }, + { + "accuracy": 0.401625, + "f1": 0.38582017619486003, + "f1_weighted": 0.40082432320446715, + "hf_subset": "jav", + "languages": [ + "jav-Latn" + ], + "main_score": 0.38582017619486003, + "scores_per_experiment": [ + { + "accuracy": 0.48625, + "f1": 0.4643995259982101, + "f1_weighted": 0.48093490014252255 + }, + { + "accuracy": 0.42125, + "f1": 0.40285921408733005, + "f1_weighted": 0.4247359101699232 + }, + { + "accuracy": 0.3425, + "f1": 0.3143584360259137, + "f1_weighted": 0.33864105251831567 + }, + { + "accuracy": 0.4075, + "f1": 0.3913139354164734, + "f1_weighted": 0.41011135438037505 + }, + { + "accuracy": 0.44, + "f1": 0.4291931092590885, + "f1_weighted": 0.44237522329823803 + }, + { + "accuracy": 0.395, + "f1": 0.37965822377670894, + "f1_weighted": 0.39966318135475176 + }, + { + "accuracy": 0.3775, + "f1": 0.3657708766399898, + "f1_weighted": 0.37462251868339635 + }, + { + "accuracy": 0.36625, + "f1": 0.3720521944066414, + "f1_weighted": 0.36486773915726567 + }, + { + "accuracy": 0.3725, + "f1": 0.36010054392217467, + "f1_weighted": 0.3712331315898564 + }, + { + "accuracy": 0.4075, + "f1": 0.3784957024160698, + "f1_weighted": 0.40105822075002684 + } + ] + }, + { + "accuracy": 0.34340000000000004, + "f1": 0.3206894431133217, + "f1_weighted": 0.353083112495017, + "hf_subset": "mad", + "languages": [ + "mad-Latn" + ], + "main_score": 0.3206894431133217, + "scores_per_experiment": [ + { + "accuracy": 0.374, + "f1": 0.3257810696811188, + "f1_weighted": 0.38977042992425187 + }, + { + "accuracy": 0.406, + "f1": 0.37611944612636755, + "f1_weighted": 0.43015617836056 + }, + { + "accuracy": 0.27, + "f1": 0.24911674248201615, + "f1_weighted": 0.28928203327555957 + }, + { + "accuracy": 0.338, + "f1": 0.3036616015325878, + "f1_weighted": 0.34408739677850614 + }, + { + "accuracy": 0.352, + "f1": 0.32826017413402436, + "f1_weighted": 0.3439541344737281 + }, + { + "accuracy": 0.318, + "f1": 0.3031098928055748, + "f1_weighted": 0.3365158771714695 + }, + { + "accuracy": 0.302, + "f1": 0.2992980431254715, + "f1_weighted": 0.30698361443755984 + }, + { + "accuracy": 0.348, + "f1": 0.3408801027297854, + "f1_weighted": 0.3473396182875309 + }, + { + "accuracy": 0.356, + "f1": 0.33487405343120197, + "f1_weighted": 0.3704957464348239 + }, + { + "accuracy": 0.37, + "f1": 0.3457933050850686, + "f1_weighted": 0.3722460958061799 + } + ] + }, + { + "accuracy": 0.3456, + "f1": 0.3332417743512957, + "f1_weighted": 0.3477617860741592, + "hf_subset": "mak", + "languages": [ + "mak-Latn" + ], + "main_score": 0.3332417743512957, + "scores_per_experiment": [ + { + "accuracy": 0.304, + "f1": 0.29277017057876975, + "f1_weighted": 0.29996589209598923 + }, + { + "accuracy": 0.364, + "f1": 0.3478672256767386, + "f1_weighted": 0.3698299266733612 + }, + { + "accuracy": 0.338, + "f1": 0.32088960137583966, + "f1_weighted": 0.3355429806951172 + }, + { + "accuracy": 0.354, + "f1": 0.34488405895649493, + "f1_weighted": 0.3567552117751508 + }, + { + "accuracy": 0.372, + "f1": 0.364900742905858, + "f1_weighted": 0.37649321641700156 + }, + { + "accuracy": 0.286, + "f1": 0.2728936842401708, + "f1_weighted": 0.276313348077729 + }, + { + "accuracy": 0.364, + "f1": 0.34360382756689495, + "f1_weighted": 0.37160242491973117 + }, + { + "accuracy": 0.362, + "f1": 0.3336563740871013, + "f1_weighted": 0.36882658763521453 + }, + { + "accuracy": 0.362, + "f1": 0.35577879466410944, + "f1_weighted": 0.36688739466662645 + }, + { + "accuracy": 0.35, + "f1": 0.35517326346097955, + "f1_weighted": 0.3554008777856709 + } + ] + }, + { + "accuracy": 0.43162500000000004, + "f1": 0.4123280226867011, + "f1_weighted": 0.43162497646235753, + "hf_subset": "min", + "languages": [ + "min-Latn" + ], + "main_score": 0.4123280226867011, + "scores_per_experiment": [ + { + "accuracy": 0.46375, + "f1": 0.44484123846691637, + "f1_weighted": 0.4712451764217766 + }, + { + "accuracy": 0.3775, + "f1": 0.3643001298686448, + "f1_weighted": 0.376813449785663 + }, + { + "accuracy": 0.41125, + "f1": 0.3829144373621864, + "f1_weighted": 0.412683307362421 + }, + { + "accuracy": 0.42375, + "f1": 0.4102418554509037, + "f1_weighted": 0.4398502957802223 + }, + { + "accuracy": 0.45625, + "f1": 0.4325055144012685, + "f1_weighted": 0.45908548656621134 + }, + { + "accuracy": 0.4075, + "f1": 0.40326455329642263, + "f1_weighted": 0.40400699301874793 + }, + { + "accuracy": 0.44375, + "f1": 0.4250503086927672, + "f1_weighted": 0.437795624269616 + }, + { + "accuracy": 0.4925, + "f1": 0.4644330089283074, + "f1_weighted": 0.4962912782707864 + }, + { + "accuracy": 0.39125, + "f1": 0.3731609794849108, + "f1_weighted": 0.3780168779805638 + }, + { + "accuracy": 0.44875, + "f1": 0.422568200914683, + "f1_weighted": 0.4404612751675662 + } + ] + }, + { + "accuracy": 0.52825, + "f1": 0.459663017413245, + "f1_weighted": 0.515528475271224, + "hf_subset": "mui", + "languages": [ + "mui-Latn" + ], + "main_score": 0.459663017413245, + "scores_per_experiment": [ + { + "accuracy": 0.535, + "f1": 0.45880025529607743, + "f1_weighted": 0.512789887146747 + }, + { + "accuracy": 0.515, + "f1": 0.44756324576943396, + "f1_weighted": 0.5053786563176973 + }, + { + "accuracy": 0.57, + "f1": 0.4867495605028428, + "f1_weighted": 0.5564575786827305 + }, + { + "accuracy": 0.525, + "f1": 0.4685611501096104, + "f1_weighted": 0.5102640929081251 + }, + { + "accuracy": 0.5225, + "f1": 0.46150932318837123, + "f1_weighted": 0.5189692035734373 + }, + { + "accuracy": 0.5225, + "f1": 0.45770226695605476, + "f1_weighted": 0.5140849693698617 + }, + { + "accuracy": 0.5275, + "f1": 0.4512662495557434, + "f1_weighted": 0.5187169643132077 + }, + { + "accuracy": 0.565, + "f1": 0.49051304471740115, + "f1_weighted": 0.5511062581040848 + }, + { + "accuracy": 0.5275, + "f1": 0.4569960764317393, + "f1_weighted": 0.5116364072753933 + }, + { + "accuracy": 0.4725, + "f1": 0.416969001605175, + "f1_weighted": 0.45588073502095583 + } + ] + }, + { + "accuracy": 0.4796666666666667, + "f1": 0.44962745486366335, + "f1_weighted": 0.4709936699617824, + "hf_subset": "rej", + "languages": [ + "rej-Latn" + ], + "main_score": 0.44962745486366335, + "scores_per_experiment": [ + { + "accuracy": 0.49, + "f1": 0.45186175621323516, + "f1_weighted": 0.4808406634484221 + }, + { + "accuracy": 0.47333333333333333, + "f1": 0.4607988557270536, + "f1_weighted": 0.471745015478515 + }, + { + "accuracy": 0.49, + "f1": 0.4544473352842404, + "f1_weighted": 0.4808074365483346 + }, + { + "accuracy": 0.48333333333333334, + "f1": 0.4281558348970865, + "f1_weighted": 0.47445496870413395 + }, + { + "accuracy": 0.4766666666666667, + "f1": 0.4505383525768966, + "f1_weighted": 0.4756368480955041 + }, + { + "accuracy": 0.44666666666666666, + "f1": 0.43585601662305856, + "f1_weighted": 0.4437044537366978 + }, + { + "accuracy": 0.49333333333333335, + "f1": 0.4494438738901087, + "f1_weighted": 0.48866861186014526 + }, + { + "accuracy": 0.48, + "f1": 0.4531886933787749, + "f1_weighted": 0.4592988193843908 + }, + { + "accuracy": 0.4533333333333333, + "f1": 0.417909502775691, + "f1_weighted": 0.43155574494059723 + }, + { + "accuracy": 0.51, + "f1": 0.4940743272704882, + "f1_weighted": 0.5032241374210824 + } + ] + }, + { + "accuracy": 0.39325, + "f1": 0.3763872295097401, + "f1_weighted": 0.3832931567207166, + "hf_subset": "sun", + "languages": [ + "sun-Latn" + ], + "main_score": 0.3763872295097401, + "scores_per_experiment": [ + { + "accuracy": 0.38625, + "f1": 0.3805730407763334, + "f1_weighted": 0.3764588030293387 + }, + { + "accuracy": 0.36375, + "f1": 0.3650317461156343, + "f1_weighted": 0.34099019135584024 + }, + { + "accuracy": 0.425, + "f1": 0.39959939304184167, + "f1_weighted": 0.4230908145364338 + }, + { + "accuracy": 0.395, + "f1": 0.373317400296529, + "f1_weighted": 0.39337329478943567 + }, + { + "accuracy": 0.35875, + "f1": 0.34543384961840706, + "f1_weighted": 0.35692749381907346 + }, + { + "accuracy": 0.3475, + "f1": 0.3263024700923112, + "f1_weighted": 0.3391100623478559 + }, + { + "accuracy": 0.43625, + "f1": 0.41907354958492377, + "f1_weighted": 0.4305452819434848 + }, + { + "accuracy": 0.44, + "f1": 0.4110033262988062, + "f1_weighted": 0.426467703341569 + }, + { + "accuracy": 0.4275, + "f1": 0.39967878274506885, + "f1_weighted": 0.4154273254915345 + }, + { + "accuracy": 0.3525, + "f1": 0.34385873652754534, + "f1_weighted": 0.3305405965526003 + } + ] + } + ] + }, + "task_name": "NusaParagraphEmotionClassification" +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphEmotionClassification.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphEmotionClassification.json new file mode 100644 index 0000000000..60065a8372 --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphEmotionClassification.json @@ -0,0 +1,631 @@ +{ + "dataset_revision": "c61e8c3ee47d2dce296e9601195916b54c21d575", + "evaluation_time": 24.23644709587097, + "kg_co2_emissions": null, + "mteb_version": "1.12.30", + "scores": { + "test": [ + { + "accuracy": 0.277, + "f1": 0.27126165024982085, + "f1_weighted": 0.2834478556106432, + "hf_subset": "btk", + "languages": [ + "bbc-Latn" + ], + "main_score": 0.27126165024982085, + "scores_per_experiment": [ + { + "accuracy": 0.314, + "f1": 0.29369774223453093, + "f1_weighted": 0.3141896438125419 + }, + { + "accuracy": 0.24, + "f1": 0.23040100285568038, + "f1_weighted": 0.2479308563492741 + }, + { + "accuracy": 0.27, + "f1": 0.2692443860511899, + "f1_weighted": 0.2807129856147608 + }, + { + "accuracy": 0.294, + "f1": 0.2859904308906798, + "f1_weighted": 0.29682513237245234 + }, + { + "accuracy": 0.264, + "f1": 0.26069297096200544, + "f1_weighted": 0.2666833431167352 + }, + { + "accuracy": 0.28, + "f1": 0.2763027716944718, + "f1_weighted": 0.28413856051432484 + }, + { + "accuracy": 0.29, + "f1": 0.28590985886058995, + "f1_weighted": 0.2957615157478846 + }, + { + "accuracy": 0.27, + "f1": 0.2764076446519172, + "f1_weighted": 0.2812094799510077 + }, + { + "accuracy": 0.272, + "f1": 0.25965981472105015, + "f1_weighted": 0.2767962794823585 + }, + { + "accuracy": 0.276, + "f1": 0.2743098795760927, + "f1_weighted": 0.290230759145092 + } + ] + }, + { + "accuracy": 0.35487500000000005, + "f1": 0.33697352910373235, + "f1_weighted": 0.3601622467783322, + "hf_subset": "bew", + "languages": [ + "bew-Latn" + ], + "main_score": 0.33697352910373235, + "scores_per_experiment": [ + { + "accuracy": 0.37125, + "f1": 0.3540077335153401, + "f1_weighted": 0.39137483966920933 + }, + { + "accuracy": 0.32875, + "f1": 0.3160142526344732, + "f1_weighted": 0.3259161815852832 + }, + { + "accuracy": 0.34, + "f1": 0.31817166994834045, + "f1_weighted": 0.3542369525513665 + }, + { + "accuracy": 0.29875, + "f1": 0.292564916118992, + "f1_weighted": 0.30966649088408155 + }, + { + "accuracy": 0.35, + "f1": 0.32293996085209403, + "f1_weighted": 0.35243611573882055 + }, + { + "accuracy": 0.43, + "f1": 0.40910086719682504, + "f1_weighted": 0.4334252155593172 + }, + { + "accuracy": 0.3475, + "f1": 0.31521303287394264, + "f1_weighted": 0.34742462579045 + }, + { + "accuracy": 0.34875, + "f1": 0.3517307789872814, + "f1_weighted": 0.3469190752215223 + }, + { + "accuracy": 0.34375, + "f1": 0.32420658665204655, + "f1_weighted": 0.34431894374713173 + }, + { + "accuracy": 0.39, + "f1": 0.3657854922579878, + "f1_weighted": 0.3959040270361394 + } + ] + }, + { + "accuracy": 0.32133333333333336, + "f1": 0.2664925322431652, + "f1_weighted": 0.3175271605763864, + "hf_subset": "bug", + "languages": [ + "bug-Latn" + ], + "main_score": 0.2664925322431652, + "scores_per_experiment": [ + { + "accuracy": 0.33, + "f1": 0.2690694242677827, + "f1_weighted": 0.32893867770269963 + }, + { + "accuracy": 0.29, + "f1": 0.23934870533781605, + "f1_weighted": 0.2872312129626104 + }, + { + "accuracy": 0.34, + "f1": 0.27303276055894815, + "f1_weighted": 0.3342038219082274 + }, + { + "accuracy": 0.3566666666666667, + "f1": 0.2874656532634115, + "f1_weighted": 0.34913223877981514 + }, + { + "accuracy": 0.30333333333333334, + "f1": 0.2559630348617153, + "f1_weighted": 0.30848225085615355 + }, + { + "accuracy": 0.34, + "f1": 0.28628676608727677, + "f1_weighted": 0.3352255880289301 + }, + { + "accuracy": 0.2866666666666667, + "f1": 0.24670223565283042, + "f1_weighted": 0.2808510152948773 + }, + { + "accuracy": 0.2833333333333333, + "f1": 0.24865305923374081, + "f1_weighted": 0.27647516313911474 + }, + { + "accuracy": 0.32666666666666666, + "f1": 0.27056642764881217, + "f1_weighted": 0.3258835232970632 + }, + { + "accuracy": 0.3566666666666667, + "f1": 0.28783725551931855, + "f1_weighted": 0.34884811379437236 + } + ] + }, + { + "accuracy": 0.22849999999999998, + "f1": 0.2238462715459189, + "f1_weighted": 0.23244042135604306, + "hf_subset": "jav", + "languages": [ + "jav-Latn" + ], + "main_score": 0.2238462715459189, + "scores_per_experiment": [ + { + "accuracy": 0.22625, + "f1": 0.22477912752698653, + "f1_weighted": 0.2268541165610537 + }, + { + "accuracy": 0.2175, + "f1": 0.2186960101249864, + "f1_weighted": 0.22566634543544642 + }, + { + "accuracy": 0.1975, + "f1": 0.1879092099035058, + "f1_weighted": 0.20343751167561647 + }, + { + "accuracy": 0.26625, + "f1": 0.26419870970111176, + "f1_weighted": 0.2717422574236455 + }, + { + "accuracy": 0.215, + "f1": 0.20979841378466718, + "f1_weighted": 0.2062623277473962 + }, + { + "accuracy": 0.24125, + "f1": 0.2326430941053767, + "f1_weighted": 0.24863365850820407 + }, + { + "accuracy": 0.21875, + "f1": 0.2197192231925285, + "f1_weighted": 0.21640056865489501 + }, + { + "accuracy": 0.2225, + "f1": 0.21311042980527248, + "f1_weighted": 0.22436332722932423 + }, + { + "accuracy": 0.2275, + "f1": 0.22091719690039796, + "f1_weighted": 0.24580931109901363 + }, + { + "accuracy": 0.2525, + "f1": 0.2466913004143557, + "f1_weighted": 0.2552347892258354 + } + ] + }, + { + "accuracy": 0.21980000000000005, + "f1": 0.21192658413285695, + "f1_weighted": 0.22727227650014284, + "hf_subset": "mad", + "languages": [ + "mad-Latn" + ], + "main_score": 0.21192658413285695, + "scores_per_experiment": [ + { + "accuracy": 0.208, + "f1": 0.18511888176376187, + "f1_weighted": 0.21332821233845325 + }, + { + "accuracy": 0.224, + "f1": 0.2127281677917643, + "f1_weighted": 0.2346192022511952 + }, + { + "accuracy": 0.194, + "f1": 0.17373676326757032, + "f1_weighted": 0.2088309335083741 + }, + { + "accuracy": 0.23, + "f1": 0.23155689321755518, + "f1_weighted": 0.23894340228538488 + }, + { + "accuracy": 0.218, + "f1": 0.23053279263131268, + "f1_weighted": 0.22273361807837916 + }, + { + "accuracy": 0.204, + "f1": 0.19761834854393823, + "f1_weighted": 0.224314713296677 + }, + { + "accuracy": 0.208, + "f1": 0.20984782820267348, + "f1_weighted": 0.20950001880163593 + }, + { + "accuracy": 0.21, + "f1": 0.2002282237144761, + "f1_weighted": 0.2029563737538714 + }, + { + "accuracy": 0.25, + "f1": 0.24298470899655614, + "f1_weighted": 0.2584185609503066 + }, + { + "accuracy": 0.252, + "f1": 0.23491323319896099, + "f1_weighted": 0.2590777297371507 + } + ] + }, + { + "accuracy": 0.2638, + "f1": 0.2578358903262782, + "f1_weighted": 0.2661928065319922, + "hf_subset": "mak", + "languages": [ + "mak-Latn" + ], + "main_score": 0.2578358903262782, + "scores_per_experiment": [ + { + "accuracy": 0.248, + "f1": 0.24069288406861938, + "f1_weighted": 0.2447664507915594 + }, + { + "accuracy": 0.284, + "f1": 0.27058688367620015, + "f1_weighted": 0.27673645243283373 + }, + { + "accuracy": 0.272, + "f1": 0.2741225301737858, + "f1_weighted": 0.276621274436082 + }, + { + "accuracy": 0.314, + "f1": 0.30740488258621973, + "f1_weighted": 0.321737645849929 + }, + { + "accuracy": 0.252, + "f1": 0.25614318446498197, + "f1_weighted": 0.24971459967780393 + }, + { + "accuracy": 0.246, + "f1": 0.24137243318728094, + "f1_weighted": 0.25596496053060297 + }, + { + "accuracy": 0.266, + "f1": 0.2626051752995075, + "f1_weighted": 0.26861546664457053 + }, + { + "accuracy": 0.238, + "f1": 0.22604490798811452, + "f1_weighted": 0.2496284185403417 + }, + { + "accuracy": 0.25, + "f1": 0.24253494248571034, + "f1_weighted": 0.25078819155699467 + }, + { + "accuracy": 0.268, + "f1": 0.2568510793323614, + "f1_weighted": 0.26735460485920354 + } + ] + }, + { + "accuracy": 0.331625, + "f1": 0.31203752528160794, + "f1_weighted": 0.3375791706110565, + "hf_subset": "min", + "languages": [ + "min-Latn" + ], + "main_score": 0.31203752528160794, + "scores_per_experiment": [ + { + "accuracy": 0.33375, + "f1": 0.32358589078715616, + "f1_weighted": 0.34746891132693414 + }, + { + "accuracy": 0.29875, + "f1": 0.28355913142117073, + "f1_weighted": 0.2909612089632645 + }, + { + "accuracy": 0.32375, + "f1": 0.3071041577833395, + "f1_weighted": 0.3292705758622453 + }, + { + "accuracy": 0.3225, + "f1": 0.2931790543044436, + "f1_weighted": 0.33925343908735384 + }, + { + "accuracy": 0.33125, + "f1": 0.31628774219425376, + "f1_weighted": 0.3424427529903656 + }, + { + "accuracy": 0.29375, + "f1": 0.2818082503741261, + "f1_weighted": 0.2953881319707134 + }, + { + "accuracy": 0.31, + "f1": 0.3003831770748599, + "f1_weighted": 0.3072058319425441 + }, + { + "accuracy": 0.3725, + "f1": 0.33535589304540814, + "f1_weighted": 0.3807685347860339 + }, + { + "accuracy": 0.34125, + "f1": 0.3231075938748288, + "f1_weighted": 0.3555907475538145 + }, + { + "accuracy": 0.38875, + "f1": 0.3560043619564926, + "f1_weighted": 0.38744157162729587 + } + ] + }, + { + "accuracy": 0.4692499999999999, + "f1": 0.4303684046398549, + "f1_weighted": 0.4700613369853478, + "hf_subset": "mui", + "languages": [ + "mui-Latn" + ], + "main_score": 0.4303684046398549, + "scores_per_experiment": [ + { + "accuracy": 0.515, + "f1": 0.4563762789603358, + "f1_weighted": 0.5130766633725024 + }, + { + "accuracy": 0.5025, + "f1": 0.45901235586400585, + "f1_weighted": 0.5058057433498943 + }, + { + "accuracy": 0.4925, + "f1": 0.45595602425671666, + "f1_weighted": 0.49693218586812554 + }, + { + "accuracy": 0.48, + "f1": 0.4415106364975855, + "f1_weighted": 0.48154788019378564 + }, + { + "accuracy": 0.455, + "f1": 0.41773898927582725, + "f1_weighted": 0.45784476720360373 + }, + { + "accuracy": 0.4375, + "f1": 0.40889437612984875, + "f1_weighted": 0.44016082019818187 + }, + { + "accuracy": 0.4775, + "f1": 0.4290062891935917, + "f1_weighted": 0.47653765372639895 + }, + { + "accuracy": 0.48, + "f1": 0.4369830693147803, + "f1_weighted": 0.4814515517428867 + }, + { + "accuracy": 0.435, + "f1": 0.39401451572891144, + "f1_weighted": 0.435000304604335 + }, + { + "accuracy": 0.4175, + "f1": 0.4041915111769459, + "f1_weighted": 0.41225579959376396 + } + ] + }, + { + "accuracy": 0.29733333333333334, + "f1": 0.2762887415609376, + "f1_weighted": 0.2978179318127897, + "hf_subset": "rej", + "languages": [ + "rej-Latn" + ], + "main_score": 0.2762887415609376, + "scores_per_experiment": [ + { + "accuracy": 0.2833333333333333, + "f1": 0.25604603411886484, + "f1_weighted": 0.29245780306351704 + }, + { + "accuracy": 0.30333333333333334, + "f1": 0.28113053427750806, + "f1_weighted": 0.3010135394805909 + }, + { + "accuracy": 0.32, + "f1": 0.2971136531022927, + "f1_weighted": 0.3262235990075212 + }, + { + "accuracy": 0.2966666666666667, + "f1": 0.2727037307899841, + "f1_weighted": 0.29903655444221483 + }, + { + "accuracy": 0.28, + "f1": 0.2645968765718886, + "f1_weighted": 0.288615503042191 + }, + { + "accuracy": 0.26666666666666666, + "f1": 0.24737017825172716, + "f1_weighted": 0.2570440434967498 + }, + { + "accuracy": 0.31, + "f1": 0.2943694084415235, + "f1_weighted": 0.32064680156428677 + }, + { + "accuracy": 0.30333333333333334, + "f1": 0.2906138666667327, + "f1_weighted": 0.30399507201423165 + }, + { + "accuracy": 0.26666666666666666, + "f1": 0.2434507358645241, + "f1_weighted": 0.2570368114424851 + }, + { + "accuracy": 0.3433333333333333, + "f1": 0.3154923975243301, + "f1_weighted": 0.3321095905741091 + } + ] + }, + { + "accuracy": 0.22737500000000002, + "f1": 0.22201108018236107, + "f1_weighted": 0.2268334566577134, + "hf_subset": "sun", + "languages": [ + "sun-Latn" + ], + "main_score": 0.22201108018236107, + "scores_per_experiment": [ + { + "accuracy": 0.26375, + "f1": 0.26468537010811055, + "f1_weighted": 0.27241507710618595 + }, + { + "accuracy": 0.215, + "f1": 0.21243401329304484, + "f1_weighted": 0.21401278307641203 + }, + { + "accuracy": 0.25, + "f1": 0.24041892287793698, + "f1_weighted": 0.24992888730438692 + }, + { + "accuracy": 0.24, + "f1": 0.2437984335090524, + "f1_weighted": 0.2470727529412417 + }, + { + "accuracy": 0.1975, + "f1": 0.1916703232959061, + "f1_weighted": 0.19990974511107013 + }, + { + "accuracy": 0.2225, + "f1": 0.20044414068164243, + "f1_weighted": 0.22217627866958062 + }, + { + "accuracy": 0.21875, + "f1": 0.2233498238865721, + "f1_weighted": 0.22554046966128205 + }, + { + "accuracy": 0.22875, + "f1": 0.22585718909251032, + "f1_weighted": 0.22904241320467095 + }, + { + "accuracy": 0.21625, + "f1": 0.19565547010330434, + "f1_weighted": 0.19907489223174352 + }, + { + "accuracy": 0.22125, + "f1": 0.22179711497553087, + "f1_weighted": 0.20916126727055986 + } + ] + } + ] + }, + "task_name": "NusaParagraphEmotionClassification" +} \ No newline at end of file From 19cc06ffbea0d732e936ff4431d66fd6004efbb4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 15 Jun 2024 20:59:43 +0000 Subject: [PATCH 7/9] Update points table --- docs/mmteb/points_table.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/mmteb/points_table.md b/docs/mmteb/points_table.md index 1a4a1c12eb..4edfaf15d6 100644 --- a/docs/mmteb/points_table.md +++ b/docs/mmteb/points_table.md @@ -4,7 +4,7 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended | GitHub | New dataset | Review PR | Coordination | Bug fixes | Dataset annotations | Running Models | Paper writing | New task | Total | |:------------------|--------------:|------------:|---------------:|------------:|----------------------:|-----------------:|----------------:|-----------:|--------:| -| KennethEnevoldsen | 68 | 260 | 11 | 81 | 35 | 0 | 0 | 0 | 455 | +| KennethEnevoldsen | 68 | 262 | 11 | 81 | 35 | 0 | 0 | 0 | 457 | | isaac-chung | 116 | 182 | 4 | 40 | 1 | 0 | 4 | 0 | 347 | | awinml | 292 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 294 | | imenelydiaker | 120 | 140 | 0 | 20 | 0 | 0 | 0 | 0 | 280 | @@ -14,7 +14,7 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended | wissam-sib | 134 | 6 | 0 | 4 | 0 | 0 | 0 | 0 | 144 | | jupyterjazz | 108 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 108 | | SaitejaUtpala | 102 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 102 | -| gentaiscool | 100 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 100 | +| gentaiscool | 102 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 102 | | dokato | 82 | 4 | 0 | 8 | 0 | 0 | 0 | 0 | 94 | | schmarion | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | | MathieuCiancone | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | From bf7fc5b72ef1d09fa5dcd5445bbc617e13e04e13 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 15 Jun 2024 21:03:02 +0000 Subject: [PATCH 8/9] Update tasks table --- docs/tasks.md | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/docs/tasks.md b/docs/tasks.md index b68239bf5e..833be841d3 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -337,6 +337,7 @@ The following tables give you an overview of the tasks in MTEB. | [NordicLangClassification](https://aclanthology.org/2021.vardial-1.8/) | ['dan', 'fao', 'isl', 'nno', 'nob', 'swe'] | Classification | s2s | | {'test': 3000} | {'test': 78.2} | | [NorwegianCourtsBitextMining](https://opus.nlpl.eu/index.php) (Tiedemann et al., 2020) | ['nno', 'nob'] | BitextMining | s2s | [Legal] | {'test': 2050} | {'test': 1884.0} | | [NorwegianParliamentClassification](https://huggingface.co/datasets/NbAiLab/norwegian_parliament) | ['nob'] | Classification | s2s | | {'test': 1200, 'validation': 1200} | {'test': 1884.0, 'validation': 1911.0} | +| [NusaParagraphEmotionClassification](https://github.com/IndoNLP/nusa-writes) | ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | Classification | s2s | [Non-fiction, Fiction] | {'train': 15516, 'validation': 2948, 'test': 6250} | {'train': 740.24, 'validation': 740.66, 'test': 740.71} | | [NusaParagraphTopicClassification](https://github.com/IndoNLP/nusa-writes) | ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | Classification | s2s | [Non-fiction, Fiction] | {'train': 15516, 'validation': 2948, 'test': 6250} | {'train': 740.24, 'validation': 740.66, 'test': 740.71} | | [NusaTranslationBitextMining](https://huggingface.co/datasets/indonlp/nusatranslation_mt) (Cahyawijaya et al., 2023) | ['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | BitextMining | s2s | [Social] | {'train': 50200} | {'train': 147.01} | | [NusaX-senti](https://arxiv.org/abs/2205.15960) (Winata et al., 2022) | ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] | Classification | s2s | [Reviews, Web, Social, Constructed] | {'test': 4800} | {'test': 52.4} | @@ -644,7 +645,7 @@ The following tables give you an overview of the tasks in MTEB. | bao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bba | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bbc | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| bbc | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -657,7 +658,7 @@ The following tables give you an overview of the tasks in MTEB. | beo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | ber | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | beu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bew | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| bew | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bgc | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bgs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bgt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -705,7 +706,7 @@ The following tables give you an overview of the tasks in MTEB. | bsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bsp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bug | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| bug | 2 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | buk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | bul | 3 | 5 | 0 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | | bus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -945,7 +946,7 @@ The following tables give you an overview of the tasks in MTEB. | jac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | jae | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | jao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jav | 4 | 7 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | +| jav | 4 | 8 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | jic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | jid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | jiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1070,11 +1071,11 @@ The following tables give you an overview of the tasks in MTEB. | lvs | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | lww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | maa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mad | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| mad | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mag | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mai | 4 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | maj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mak | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| mak | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mal | 7 | 8 | 1 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | | mam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | maq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1117,7 +1118,7 @@ The following tables give you an overview of the tasks in MTEB. | mig | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mih | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mil | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| min | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| min | 3 | 5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1159,7 +1160,7 @@ The following tables give you an overview of the tasks in MTEB. | msy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mti | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mui | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| mui | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mup | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | mux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | muy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1316,7 +1317,7 @@ The following tables give you an overview of the tasks in MTEB. | rai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | raj | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | reg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rej | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| rej | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | rgu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | rkb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | rmc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1389,7 +1390,7 @@ The following tables give you an overview of the tasks in MTEB. | stp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | sua | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | sue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sun | 3 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | +| sun | 3 | 5 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | sus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | suz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | svk | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -1595,7 +1596,7 @@ The following tables give you an overview of the tasks in MTEB. | zty | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | zul | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | | zyp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| Total | 1390 | 980 | 107 | 3 | 28 | 67 | 46 | 335 | 85 | 2 | +| Total | 1390 | 990 | 107 | 3 | 28 | 67 | 46 | 335 | 85 | 2 | From 443fe03654a8e1864982712d256c4b3b990bd369 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 15 Jun 2024 21:07:50 +0000 Subject: [PATCH 9/9] 1.12.33 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fb39b81cad..6a6c864508 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.12.32" +version = "1.12.33" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [