From 5e4ad442535a20719f42457ca034269df3de3646 Mon Sep 17 00:00:00 2001 From: Genta Indra Winata Date: Sat, 15 Jun 2024 16:58:46 -0400 Subject: [PATCH] fix: Add NusaParagraph Emotion Classification (#928) * add NusaTranslationEmotionClassification * update name * add new task * add new task * Update mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py Co-authored-by: Kenneth Enevoldsen * add sizes * add point * add results * update desc --------- Co-authored-by: Kenneth Enevoldsen --- docs/mmteb/points/928.jsonl | 2 + mteb/abstasks/TaskMetadata.py | 1 + mteb/tasks/Classification/__init__.py | 1 + .../NusaParagraphEmotionClassification.py | 60 ++ .../NusaParagraphEmotionClassification.json | 631 ++++++++++++++++++ .../NusaParagraphEmotionClassification.json | 631 ++++++++++++++++++ 6 files changed, 1326 insertions(+) create mode 100644 docs/mmteb/points/928.jsonl create mode 100644 mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py create mode 100644 results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json create mode 100644 results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphEmotionClassification.json diff --git a/docs/mmteb/points/928.jsonl b/docs/mmteb/points/928.jsonl new file mode 100644 index 0000000000..5c807f432f --- /dev/null +++ b/docs/mmteb/points/928.jsonl @@ -0,0 +1,2 @@ +{"GitHub": "gentaiscool", "New dataset": 2} +{"GitHub": "KennethEnevoldsen", "Review PR": 2} \ No newline at end of file diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 2784491b7a..c81f302163 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -35,6 +35,7 @@ "Cross-Lingual Semantic Discrimination", "Textual Entailment", "Counterfactual Detection", + "Emotion classification", ] TASK_DOMAIN = Literal[ diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py index 9559feeb80..529ce5cba6 100644 --- a/mteb/tasks/Classification/__init__.py +++ b/mteb/tasks/Classification/__init__.py @@ -86,6 +86,7 @@ from .multilingual.MultilingualSentimentClassification import * from .multilingual.NaijaSenti import * from .multilingual.NordicLangClassification import * +from .multilingual.NusaParagraphEmotionClassification import * from .multilingual.NusaParagraphTopicClassification import * from .multilingual.NusaXSenti import * from .multilingual.ScalaClassification import * diff --git a/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py b/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py new file mode 100644 index 0000000000..1a565764ef --- /dev/null +++ b/mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks import AbsTaskClassification, MultilingualTask + +_LANGUAGES = { + "btk": ["bbc-Latn"], + "bew": ["bew-Latn"], + "bug": ["bug-Latn"], + "jav": ["jav-Latn"], + "mad": ["mad-Latn"], + "mak": ["mak-Latn"], + "min": ["min-Latn"], + "mui": ["mui-Latn"], + "rej": ["rej-Latn"], + "sun": ["sun-Latn"], +} + + +class NusaParagraphEmotionClassification(MultilingualTask, AbsTaskClassification): + metadata = TaskMetadata( + name="NusaParagraphEmotionClassification", + dataset={ + "path": "gentaiscool/nusaparagraph_emot", + "revision": "c61e8c3ee47d2dce296e9601195916b54c21d575", + }, + description="NusaParagraphEmotionClassification is a multi-class emotion classification on 10 Indonesian languages from the NusaParagraph dataset.", + reference="https://github.com/IndoNLP/nusa-writes", + category="s2s", + type="Classification", + eval_splits=["test"], + eval_langs=_LANGUAGES, + main_score="f1", + date=("2021-08-01", "2022-07-01"), + form=["written"], + domains=["Non-fiction", "Fiction"], + task_subtypes=["Emotion classification"], + license="Apache 2.0", + socioeconomic_status="mixed", + annotations_creators="human-annotated", + dialect=[], + text_creation="found", + bibtex_citation=""" + @inproceedings{cahyawijaya-etal-2023-nusawrites, + title = "NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages", + author = "Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and Linuwih, Hanung and Wilie, Bryan and Muridan, Galih and Winata, Genta and Moeljadi, David and Aji, Alham Fikri and Purwarianti, Ayu and Fung, Pascale", + editor = "Park, Jong C. and Arase, Yuki and Hu, Baotian and Lu, Wei and Wijaya, Derry and Purwarianti, Ayu and Krisnadhi, Adila Alfa", + booktitle = "Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)", + month = nov, + year = "2023", + address = "Nusa Dua, Bali", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2023.ijcnlp-main.60", + pages = "921--945", + } + """, + n_samples={"train": 15516, "validation": 2948, "test": 6250}, + avg_character_length={"train": 740.24, "validation": 740.66, "test": 740.71}, + ) diff --git a/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json b/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json new file mode 100644 index 0000000000..238beb21fa --- /dev/null +++ b/results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json @@ -0,0 +1,631 @@ +{ + "dataset_revision": "c61e8c3ee47d2dce296e9601195916b54c21d575", + "evaluation_time": 30.681774854660034, + "kg_co2_emissions": null, + "mteb_version": "1.12.30", + "scores": { + "test": [ + { + "accuracy": 0.3214, + "f1": 0.3151323771508966, + "f1_weighted": 0.3257212379765135, + "hf_subset": "btk", + "languages": [ + "bbc-Latn" + ], + "main_score": 0.3151323771508966, + "scores_per_experiment": [ + { + "accuracy": 0.338, + "f1": 0.3236514463623961, + "f1_weighted": 0.3397102191118184 + }, + { + "accuracy": 0.29, + "f1": 0.2856258218344667, + "f1_weighted": 0.2956383928680353 + }, + { + "accuracy": 0.3, + "f1": 0.28624652661584127, + "f1_weighted": 0.2962150927946865 + }, + { + "accuracy": 0.338, + "f1": 0.33311973084244556, + "f1_weighted": 0.3430751513090468 + }, + { + "accuracy": 0.308, + "f1": 0.2973040036999982, + "f1_weighted": 0.30312791955388485 + }, + { + "accuracy": 0.302, + "f1": 0.3010583265372783, + "f1_weighted": 0.30175753237274233 + }, + { + "accuracy": 0.318, + "f1": 0.31745771043978044, + "f1_weighted": 0.32994312289240507 + }, + { + "accuracy": 0.374, + "f1": 0.3713451815896451, + "f1_weighted": 0.3857351469335026 + }, + { + "accuracy": 0.284, + "f1": 0.27334223196627144, + "f1_weighted": 0.2866092974965804 + }, + { + "accuracy": 0.362, + "f1": 0.3621727916208433, + "f1_weighted": 0.37540050443243267 + } + ] + }, + { + "accuracy": 0.4245, + "f1": 0.39380827342537683, + "f1_weighted": 0.42087199071649967, + "hf_subset": "bew", + "languages": [ + "bew-Latn" + ], + "main_score": 0.39380827342537683, + "scores_per_experiment": [ + { + "accuracy": 0.39375, + "f1": 0.3762872388681893, + "f1_weighted": 0.4109293102948807 + }, + { + "accuracy": 0.42, + "f1": 0.387323735355736, + "f1_weighted": 0.41738570468144703 + }, + { + "accuracy": 0.455, + "f1": 0.41230570882008155, + "f1_weighted": 0.4520319864093622 + }, + { + "accuracy": 0.35125, + "f1": 0.329979208092284, + "f1_weighted": 0.35440600907433795 + }, + { + "accuracy": 0.42875, + "f1": 0.3906887129230144, + "f1_weighted": 0.42110910079651737 + }, + { + "accuracy": 0.48625, + "f1": 0.451543696765524, + "f1_weighted": 0.4781291288964929 + }, + { + "accuracy": 0.4025, + "f1": 0.3659936419296737, + "f1_weighted": 0.39636583824770427 + }, + { + "accuracy": 0.42125, + "f1": 0.40038284642720695, + "f1_weighted": 0.408484444410578 + }, + { + "accuracy": 0.43, + "f1": 0.4037500439412352, + "f1_weighted": 0.41993327815762227 + }, + { + "accuracy": 0.45625, + "f1": 0.4198279011308231, + "f1_weighted": 0.44994510619605455 + } + ] + }, + { + "accuracy": 0.458, + "f1": 0.3387445290798234, + "f1_weighted": 0.4261662839404307, + "hf_subset": "bug", + "languages": [ + "bug-Latn" + ], + "main_score": 0.3387445290798234, + "scores_per_experiment": [ + { + "accuracy": 0.4166666666666667, + "f1": 0.3095733756091684, + "f1_weighted": 0.38590243994829815 + }, + { + "accuracy": 0.44, + "f1": 0.33298078830292477, + "f1_weighted": 0.4128807512434945 + }, + { + "accuracy": 0.47333333333333333, + "f1": 0.3321163002918808, + "f1_weighted": 0.4302824452044468 + }, + { + "accuracy": 0.4866666666666667, + "f1": 0.35654576410497985, + "f1_weighted": 0.4475350570654345 + }, + { + "accuracy": 0.48, + "f1": 0.35006772312202955, + "f1_weighted": 0.44200862091903254 + }, + { + "accuracy": 0.46, + "f1": 0.35517093585649745, + "f1_weighted": 0.4263934378077041 + }, + { + "accuracy": 0.43666666666666665, + "f1": 0.32089116542532686, + "f1_weighted": 0.4111403935838718 + }, + { + "accuracy": 0.4666666666666667, + "f1": 0.35239548109818564, + "f1_weighted": 0.43843235041632106 + }, + { + "accuracy": 0.46, + "f1": 0.3396318106844423, + "f1_weighted": 0.4334851347482926 + }, + { + "accuracy": 0.46, + "f1": 0.3380719463027983, + "f1_weighted": 0.43360220846741054 + } + ] + }, + { + "accuracy": 0.401625, + "f1": 0.38582017619486003, + "f1_weighted": 0.40082432320446715, + "hf_subset": "jav", + "languages": [ + "jav-Latn" + ], + "main_score": 0.38582017619486003, + "scores_per_experiment": [ + { + "accuracy": 0.48625, + "f1": 0.4643995259982101, + "f1_weighted": 0.48093490014252255 + }, + { + "accuracy": 0.42125, + "f1": 0.40285921408733005, + "f1_weighted": 0.4247359101699232 + }, + { + "accuracy": 0.3425, + "f1": 0.3143584360259137, + "f1_weighted": 0.33864105251831567 + }, + { + "accuracy": 0.4075, + "f1": 0.3913139354164734, + "f1_weighted": 0.41011135438037505 + }, + { + "accuracy": 0.44, + "f1": 0.4291931092590885, + "f1_weighted": 0.44237522329823803 + }, + { + "accuracy": 0.395, + "f1": 0.37965822377670894, + "f1_weighted": 0.39966318135475176 + }, + { + "accuracy": 0.3775, + "f1": 0.3657708766399898, + "f1_weighted": 0.37462251868339635 + }, + { + "accuracy": 0.36625, + "f1": 0.3720521944066414, + "f1_weighted": 0.36486773915726567 + }, + { + "accuracy": 0.3725, + "f1": 0.36010054392217467, + "f1_weighted": 0.3712331315898564 + }, + { + "accuracy": 0.4075, + "f1": 0.3784957024160698, + "f1_weighted": 0.40105822075002684 + } + ] + }, + { + "accuracy": 0.34340000000000004, + "f1": 0.3206894431133217, + "f1_weighted": 0.353083112495017, + "hf_subset": "mad", + "languages": [ + "mad-Latn" + ], + "main_score": 0.3206894431133217, + "scores_per_experiment": [ + { + "accuracy": 0.374, + "f1": 0.3257810696811188, + "f1_weighted": 0.38977042992425187 + }, + { + "accuracy": 0.406, + "f1": 0.37611944612636755, + "f1_weighted": 0.43015617836056 + }, + { + "accuracy": 0.27, + "f1": 0.24911674248201615, + "f1_weighted": 0.28928203327555957 + }, + { + "accuracy": 0.338, + "f1": 0.3036616015325878, + "f1_weighted": 0.34408739677850614 + }, + { + "accuracy": 0.352, + "f1": 0.32826017413402436, + "f1_weighted": 0.3439541344737281 + }, + { + "accuracy": 0.318, + "f1": 0.3031098928055748, + "f1_weighted": 0.3365158771714695 + }, + { + "accuracy": 0.302, + "f1": 0.2992980431254715, + "f1_weighted": 0.30698361443755984 + }, + { + "accuracy": 0.348, + "f1": 0.3408801027297854, + "f1_weighted": 0.3473396182875309 + }, + { + "accuracy": 0.356, + "f1": 0.33487405343120197, + "f1_weighted": 0.3704957464348239 + }, + { + "accuracy": 0.37, + "f1": 0.3457933050850686, + "f1_weighted": 0.3722460958061799 + } + ] + }, + { + "accuracy": 0.3456, + "f1": 0.3332417743512957, + "f1_weighted": 0.3477617860741592, + "hf_subset": "mak", + "languages": [ + "mak-Latn" + ], + "main_score": 0.3332417743512957, + "scores_per_experiment": [ + { + "accuracy": 0.304, + "f1": 0.29277017057876975, + "f1_weighted": 0.29996589209598923 + }, + { + "accuracy": 0.364, + "f1": 0.3478672256767386, + "f1_weighted": 0.3698299266733612 + }, + { + "accuracy": 0.338, + "f1": 0.32088960137583966, + "f1_weighted": 0.3355429806951172 + }, + { + "accuracy": 0.354, + "f1": 0.34488405895649493, + "f1_weighted": 0.3567552117751508 + }, + { + "accuracy": 0.372, + "f1": 0.364900742905858, + "f1_weighted": 0.37649321641700156 + }, + { + "accuracy": 0.286, + "f1": 0.2728936842401708, + "f1_weighted": 0.276313348077729 + }, + { + "accuracy": 0.364, + "f1": 0.34360382756689495, + "f1_weighted": 0.37160242491973117 + }, + { + "accuracy": 0.362, + "f1": 0.3336563740871013, + "f1_weighted": 0.36882658763521453 + }, + { + "accuracy": 0.362, + "f1": 0.35577879466410944, + "f1_weighted": 0.36688739466662645 + }, + { + "accuracy": 0.35, + "f1": 0.35517326346097955, + "f1_weighted": 0.3554008777856709 + } + ] + }, + { + "accuracy": 0.43162500000000004, + "f1": 0.4123280226867011, + "f1_weighted": 0.43162497646235753, + "hf_subset": "min", + "languages": [ + "min-Latn" + ], + "main_score": 0.4123280226867011, + "scores_per_experiment": [ + { + "accuracy": 0.46375, + "f1": 0.44484123846691637, + "f1_weighted": 0.4712451764217766 + }, + { + "accuracy": 0.3775, + "f1": 0.3643001298686448, + "f1_weighted": 0.376813449785663 + }, + { + "accuracy": 0.41125, + "f1": 0.3829144373621864, + "f1_weighted": 0.412683307362421 + }, + { + "accuracy": 0.42375, + "f1": 0.4102418554509037, + "f1_weighted": 0.4398502957802223 + }, + { + "accuracy": 0.45625, + "f1": 0.4325055144012685, + "f1_weighted": 0.45908548656621134 + }, + { + "accuracy": 0.4075, + "f1": 0.40326455329642263, + "f1_weighted": 0.40400699301874793 + }, + { + "accuracy": 0.44375, + "f1": 0.4250503086927672, + "f1_weighted": 0.437795624269616 + }, + { + "accuracy": 0.4925, + "f1": 0.4644330089283074, + "f1_weighted": 0.4962912782707864 + }, + { + "accuracy": 0.39125, + "f1": 0.3731609794849108, + "f1_weighted": 0.3780168779805638 + }, + { + "accuracy": 0.44875, + "f1": 0.422568200914683, + "f1_weighted": 0.4404612751675662 + } + ] + }, + { + "accuracy": 0.52825, + "f1": 0.459663017413245, + "f1_weighted": 0.515528475271224, + "hf_subset": "mui", + "languages": [ + "mui-Latn" + ], + "main_score": 0.459663017413245, + "scores_per_experiment": [ + { + "accuracy": 0.535, + "f1": 0.45880025529607743, + "f1_weighted": 0.512789887146747 + }, + { + "accuracy": 0.515, + "f1": 0.44756324576943396, + "f1_weighted": 0.5053786563176973 + }, + { + "accuracy": 0.57, + "f1": 0.4867495605028428, + "f1_weighted": 0.5564575786827305 + }, + { + "accuracy": 0.525, + "f1": 0.4685611501096104, + "f1_weighted": 0.5102640929081251 + }, + { + "accuracy": 0.5225, + "f1": 0.46150932318837123, + "f1_weighted": 0.5189692035734373 + }, + { + "accuracy": 0.5225, + "f1": 0.45770226695605476, + "f1_weighted": 0.5140849693698617 + }, + { + "accuracy": 0.5275, + "f1": 0.4512662495557434, + "f1_weighted": 0.5187169643132077 + }, + { + "accuracy": 0.565, + "f1": 0.49051304471740115, + "f1_weighted": 0.5511062581040848 + }, + { + "accuracy": 0.5275, + "f1": 0.4569960764317393, + "f1_weighted": 0.5116364072753933 + }, + { + "accuracy": 0.4725, + "f1": 0.416969001605175, + "f1_weighted": 0.45588073502095583 + } + ] + }, + { + "accuracy": 0.4796666666666667, + "f1": 0.44962745486366335, + "f1_weighted": 0.4709936699617824, + "hf_subset": "rej", + "languages": [ + "rej-Latn" + ], + "main_score": 0.44962745486366335, + "scores_per_experiment": [ + { + "accuracy": 0.49, + "f1": 0.45186175621323516, + "f1_weighted": 0.4808406634484221 + }, + { + "accuracy": 0.47333333333333333, + "f1": 0.4607988557270536, + "f1_weighted": 0.471745015478515 + }, + { + "accuracy": 0.49, + "f1": 0.4544473352842404, + "f1_weighted": 0.4808074365483346 + }, + { + "accuracy": 0.48333333333333334, + "f1": 0.4281558348970865, + "f1_weighted": 0.47445496870413395 + }, + { + "accuracy": 0.4766666666666667, + "f1": 0.4505383525768966, + "f1_weighted": 0.4756368480955041 + }, + { + "accuracy": 0.44666666666666666, + "f1": 0.43585601662305856, + "f1_weighted": 0.4437044537366978 + }, + { + "accuracy": 0.49333333333333335, + "f1": 0.4494438738901087, + "f1_weighted": 0.48866861186014526 + }, + { + "accuracy": 0.48, + "f1": 0.4531886933787749, + "f1_weighted": 0.4592988193843908 + }, + { + "accuracy": 0.4533333333333333, + "f1": 0.417909502775691, + "f1_weighted": 0.43155574494059723 + }, + { + "accuracy": 0.51, + "f1": 0.4940743272704882, + "f1_weighted": 0.5032241374210824 + } + ] + }, + { + "accuracy": 0.39325, + "f1": 0.3763872295097401, + "f1_weighted": 0.3832931567207166, + "hf_subset": "sun", + "languages": [ + "sun-Latn" + ], + "main_score": 0.3763872295097401, + "scores_per_experiment": [ + { + "accuracy": 0.38625, + "f1": 0.3805730407763334, + "f1_weighted": 0.3764588030293387 + }, + { + "accuracy": 0.36375, + "f1": 0.3650317461156343, + "f1_weighted": 0.34099019135584024 + }, + { + "accuracy": 0.425, + "f1": 0.39959939304184167, + "f1_weighted": 0.4230908145364338 + }, + { + "accuracy": 0.395, + "f1": 0.373317400296529, + "f1_weighted": 0.39337329478943567 + }, + { + "accuracy": 0.35875, + "f1": 0.34543384961840706, + "f1_weighted": 0.35692749381907346 + }, + { + "accuracy": 0.3475, + "f1": 0.3263024700923112, + "f1_weighted": 0.3391100623478559 + }, + { + "accuracy": 0.43625, + "f1": 0.41907354958492377, + "f1_weighted": 0.4305452819434848 + }, + { + "accuracy": 0.44, + "f1": 0.4110033262988062, + "f1_weighted": 0.426467703341569 + }, + { + "accuracy": 0.4275, + "f1": 0.39967878274506885, + "f1_weighted": 0.4154273254915345 + }, + { + "accuracy": 0.3525, + "f1": 0.34385873652754534, + "f1_weighted": 0.3305405965526003 + } + ] + } + ] + }, + "task_name": "NusaParagraphEmotionClassification" +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphEmotionClassification.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphEmotionClassification.json new file mode 100644 index 0000000000..60065a8372 --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/bf3bf13ab40c3157080a7ab344c831b9ad18b5eb/NusaParagraphEmotionClassification.json @@ -0,0 +1,631 @@ +{ + "dataset_revision": "c61e8c3ee47d2dce296e9601195916b54c21d575", + "evaluation_time": 24.23644709587097, + "kg_co2_emissions": null, + "mteb_version": "1.12.30", + "scores": { + "test": [ + { + "accuracy": 0.277, + "f1": 0.27126165024982085, + "f1_weighted": 0.2834478556106432, + "hf_subset": "btk", + "languages": [ + "bbc-Latn" + ], + "main_score": 0.27126165024982085, + "scores_per_experiment": [ + { + "accuracy": 0.314, + "f1": 0.29369774223453093, + "f1_weighted": 0.3141896438125419 + }, + { + "accuracy": 0.24, + "f1": 0.23040100285568038, + "f1_weighted": 0.2479308563492741 + }, + { + "accuracy": 0.27, + "f1": 0.2692443860511899, + "f1_weighted": 0.2807129856147608 + }, + { + "accuracy": 0.294, + "f1": 0.2859904308906798, + "f1_weighted": 0.29682513237245234 + }, + { + "accuracy": 0.264, + "f1": 0.26069297096200544, + "f1_weighted": 0.2666833431167352 + }, + { + "accuracy": 0.28, + "f1": 0.2763027716944718, + "f1_weighted": 0.28413856051432484 + }, + { + "accuracy": 0.29, + "f1": 0.28590985886058995, + "f1_weighted": 0.2957615157478846 + }, + { + "accuracy": 0.27, + "f1": 0.2764076446519172, + "f1_weighted": 0.2812094799510077 + }, + { + "accuracy": 0.272, + "f1": 0.25965981472105015, + "f1_weighted": 0.2767962794823585 + }, + { + "accuracy": 0.276, + "f1": 0.2743098795760927, + "f1_weighted": 0.290230759145092 + } + ] + }, + { + "accuracy": 0.35487500000000005, + "f1": 0.33697352910373235, + "f1_weighted": 0.3601622467783322, + "hf_subset": "bew", + "languages": [ + "bew-Latn" + ], + "main_score": 0.33697352910373235, + "scores_per_experiment": [ + { + "accuracy": 0.37125, + "f1": 0.3540077335153401, + "f1_weighted": 0.39137483966920933 + }, + { + "accuracy": 0.32875, + "f1": 0.3160142526344732, + "f1_weighted": 0.3259161815852832 + }, + { + "accuracy": 0.34, + "f1": 0.31817166994834045, + "f1_weighted": 0.3542369525513665 + }, + { + "accuracy": 0.29875, + "f1": 0.292564916118992, + "f1_weighted": 0.30966649088408155 + }, + { + "accuracy": 0.35, + "f1": 0.32293996085209403, + "f1_weighted": 0.35243611573882055 + }, + { + "accuracy": 0.43, + "f1": 0.40910086719682504, + "f1_weighted": 0.4334252155593172 + }, + { + "accuracy": 0.3475, + "f1": 0.31521303287394264, + "f1_weighted": 0.34742462579045 + }, + { + "accuracy": 0.34875, + "f1": 0.3517307789872814, + "f1_weighted": 0.3469190752215223 + }, + { + "accuracy": 0.34375, + "f1": 0.32420658665204655, + "f1_weighted": 0.34431894374713173 + }, + { + "accuracy": 0.39, + "f1": 0.3657854922579878, + "f1_weighted": 0.3959040270361394 + } + ] + }, + { + "accuracy": 0.32133333333333336, + "f1": 0.2664925322431652, + "f1_weighted": 0.3175271605763864, + "hf_subset": "bug", + "languages": [ + "bug-Latn" + ], + "main_score": 0.2664925322431652, + "scores_per_experiment": [ + { + "accuracy": 0.33, + "f1": 0.2690694242677827, + "f1_weighted": 0.32893867770269963 + }, + { + "accuracy": 0.29, + "f1": 0.23934870533781605, + "f1_weighted": 0.2872312129626104 + }, + { + "accuracy": 0.34, + "f1": 0.27303276055894815, + "f1_weighted": 0.3342038219082274 + }, + { + "accuracy": 0.3566666666666667, + "f1": 0.2874656532634115, + "f1_weighted": 0.34913223877981514 + }, + { + "accuracy": 0.30333333333333334, + "f1": 0.2559630348617153, + "f1_weighted": 0.30848225085615355 + }, + { + "accuracy": 0.34, + "f1": 0.28628676608727677, + "f1_weighted": 0.3352255880289301 + }, + { + "accuracy": 0.2866666666666667, + "f1": 0.24670223565283042, + "f1_weighted": 0.2808510152948773 + }, + { + "accuracy": 0.2833333333333333, + "f1": 0.24865305923374081, + "f1_weighted": 0.27647516313911474 + }, + { + "accuracy": 0.32666666666666666, + "f1": 0.27056642764881217, + "f1_weighted": 0.3258835232970632 + }, + { + "accuracy": 0.3566666666666667, + "f1": 0.28783725551931855, + "f1_weighted": 0.34884811379437236 + } + ] + }, + { + "accuracy": 0.22849999999999998, + "f1": 0.2238462715459189, + "f1_weighted": 0.23244042135604306, + "hf_subset": "jav", + "languages": [ + "jav-Latn" + ], + "main_score": 0.2238462715459189, + "scores_per_experiment": [ + { + "accuracy": 0.22625, + "f1": 0.22477912752698653, + "f1_weighted": 0.2268541165610537 + }, + { + "accuracy": 0.2175, + "f1": 0.2186960101249864, + "f1_weighted": 0.22566634543544642 + }, + { + "accuracy": 0.1975, + "f1": 0.1879092099035058, + "f1_weighted": 0.20343751167561647 + }, + { + "accuracy": 0.26625, + "f1": 0.26419870970111176, + "f1_weighted": 0.2717422574236455 + }, + { + "accuracy": 0.215, + "f1": 0.20979841378466718, + "f1_weighted": 0.2062623277473962 + }, + { + "accuracy": 0.24125, + "f1": 0.2326430941053767, + "f1_weighted": 0.24863365850820407 + }, + { + "accuracy": 0.21875, + "f1": 0.2197192231925285, + "f1_weighted": 0.21640056865489501 + }, + { + "accuracy": 0.2225, + "f1": 0.21311042980527248, + "f1_weighted": 0.22436332722932423 + }, + { + "accuracy": 0.2275, + "f1": 0.22091719690039796, + "f1_weighted": 0.24580931109901363 + }, + { + "accuracy": 0.2525, + "f1": 0.2466913004143557, + "f1_weighted": 0.2552347892258354 + } + ] + }, + { + "accuracy": 0.21980000000000005, + "f1": 0.21192658413285695, + "f1_weighted": 0.22727227650014284, + "hf_subset": "mad", + "languages": [ + "mad-Latn" + ], + "main_score": 0.21192658413285695, + "scores_per_experiment": [ + { + "accuracy": 0.208, + "f1": 0.18511888176376187, + "f1_weighted": 0.21332821233845325 + }, + { + "accuracy": 0.224, + "f1": 0.2127281677917643, + "f1_weighted": 0.2346192022511952 + }, + { + "accuracy": 0.194, + "f1": 0.17373676326757032, + "f1_weighted": 0.2088309335083741 + }, + { + "accuracy": 0.23, + "f1": 0.23155689321755518, + "f1_weighted": 0.23894340228538488 + }, + { + "accuracy": 0.218, + "f1": 0.23053279263131268, + "f1_weighted": 0.22273361807837916 + }, + { + "accuracy": 0.204, + "f1": 0.19761834854393823, + "f1_weighted": 0.224314713296677 + }, + { + "accuracy": 0.208, + "f1": 0.20984782820267348, + "f1_weighted": 0.20950001880163593 + }, + { + "accuracy": 0.21, + "f1": 0.2002282237144761, + "f1_weighted": 0.2029563737538714 + }, + { + "accuracy": 0.25, + "f1": 0.24298470899655614, + "f1_weighted": 0.2584185609503066 + }, + { + "accuracy": 0.252, + "f1": 0.23491323319896099, + "f1_weighted": 0.2590777297371507 + } + ] + }, + { + "accuracy": 0.2638, + "f1": 0.2578358903262782, + "f1_weighted": 0.2661928065319922, + "hf_subset": "mak", + "languages": [ + "mak-Latn" + ], + "main_score": 0.2578358903262782, + "scores_per_experiment": [ + { + "accuracy": 0.248, + "f1": 0.24069288406861938, + "f1_weighted": 0.2447664507915594 + }, + { + "accuracy": 0.284, + "f1": 0.27058688367620015, + "f1_weighted": 0.27673645243283373 + }, + { + "accuracy": 0.272, + "f1": 0.2741225301737858, + "f1_weighted": 0.276621274436082 + }, + { + "accuracy": 0.314, + "f1": 0.30740488258621973, + "f1_weighted": 0.321737645849929 + }, + { + "accuracy": 0.252, + "f1": 0.25614318446498197, + "f1_weighted": 0.24971459967780393 + }, + { + "accuracy": 0.246, + "f1": 0.24137243318728094, + "f1_weighted": 0.25596496053060297 + }, + { + "accuracy": 0.266, + "f1": 0.2626051752995075, + "f1_weighted": 0.26861546664457053 + }, + { + "accuracy": 0.238, + "f1": 0.22604490798811452, + "f1_weighted": 0.2496284185403417 + }, + { + "accuracy": 0.25, + "f1": 0.24253494248571034, + "f1_weighted": 0.25078819155699467 + }, + { + "accuracy": 0.268, + "f1": 0.2568510793323614, + "f1_weighted": 0.26735460485920354 + } + ] + }, + { + "accuracy": 0.331625, + "f1": 0.31203752528160794, + "f1_weighted": 0.3375791706110565, + "hf_subset": "min", + "languages": [ + "min-Latn" + ], + "main_score": 0.31203752528160794, + "scores_per_experiment": [ + { + "accuracy": 0.33375, + "f1": 0.32358589078715616, + "f1_weighted": 0.34746891132693414 + }, + { + "accuracy": 0.29875, + "f1": 0.28355913142117073, + "f1_weighted": 0.2909612089632645 + }, + { + "accuracy": 0.32375, + "f1": 0.3071041577833395, + "f1_weighted": 0.3292705758622453 + }, + { + "accuracy": 0.3225, + "f1": 0.2931790543044436, + "f1_weighted": 0.33925343908735384 + }, + { + "accuracy": 0.33125, + "f1": 0.31628774219425376, + "f1_weighted": 0.3424427529903656 + }, + { + "accuracy": 0.29375, + "f1": 0.2818082503741261, + "f1_weighted": 0.2953881319707134 + }, + { + "accuracy": 0.31, + "f1": 0.3003831770748599, + "f1_weighted": 0.3072058319425441 + }, + { + "accuracy": 0.3725, + "f1": 0.33535589304540814, + "f1_weighted": 0.3807685347860339 + }, + { + "accuracy": 0.34125, + "f1": 0.3231075938748288, + "f1_weighted": 0.3555907475538145 + }, + { + "accuracy": 0.38875, + "f1": 0.3560043619564926, + "f1_weighted": 0.38744157162729587 + } + ] + }, + { + "accuracy": 0.4692499999999999, + "f1": 0.4303684046398549, + "f1_weighted": 0.4700613369853478, + "hf_subset": "mui", + "languages": [ + "mui-Latn" + ], + "main_score": 0.4303684046398549, + "scores_per_experiment": [ + { + "accuracy": 0.515, + "f1": 0.4563762789603358, + "f1_weighted": 0.5130766633725024 + }, + { + "accuracy": 0.5025, + "f1": 0.45901235586400585, + "f1_weighted": 0.5058057433498943 + }, + { + "accuracy": 0.4925, + "f1": 0.45595602425671666, + "f1_weighted": 0.49693218586812554 + }, + { + "accuracy": 0.48, + "f1": 0.4415106364975855, + "f1_weighted": 0.48154788019378564 + }, + { + "accuracy": 0.455, + "f1": 0.41773898927582725, + "f1_weighted": 0.45784476720360373 + }, + { + "accuracy": 0.4375, + "f1": 0.40889437612984875, + "f1_weighted": 0.44016082019818187 + }, + { + "accuracy": 0.4775, + "f1": 0.4290062891935917, + "f1_weighted": 0.47653765372639895 + }, + { + "accuracy": 0.48, + "f1": 0.4369830693147803, + "f1_weighted": 0.4814515517428867 + }, + { + "accuracy": 0.435, + "f1": 0.39401451572891144, + "f1_weighted": 0.435000304604335 + }, + { + "accuracy": 0.4175, + "f1": 0.4041915111769459, + "f1_weighted": 0.41225579959376396 + } + ] + }, + { + "accuracy": 0.29733333333333334, + "f1": 0.2762887415609376, + "f1_weighted": 0.2978179318127897, + "hf_subset": "rej", + "languages": [ + "rej-Latn" + ], + "main_score": 0.2762887415609376, + "scores_per_experiment": [ + { + "accuracy": 0.2833333333333333, + "f1": 0.25604603411886484, + "f1_weighted": 0.29245780306351704 + }, + { + "accuracy": 0.30333333333333334, + "f1": 0.28113053427750806, + "f1_weighted": 0.3010135394805909 + }, + { + "accuracy": 0.32, + "f1": 0.2971136531022927, + "f1_weighted": 0.3262235990075212 + }, + { + "accuracy": 0.2966666666666667, + "f1": 0.2727037307899841, + "f1_weighted": 0.29903655444221483 + }, + { + "accuracy": 0.28, + "f1": 0.2645968765718886, + "f1_weighted": 0.288615503042191 + }, + { + "accuracy": 0.26666666666666666, + "f1": 0.24737017825172716, + "f1_weighted": 0.2570440434967498 + }, + { + "accuracy": 0.31, + "f1": 0.2943694084415235, + "f1_weighted": 0.32064680156428677 + }, + { + "accuracy": 0.30333333333333334, + "f1": 0.2906138666667327, + "f1_weighted": 0.30399507201423165 + }, + { + "accuracy": 0.26666666666666666, + "f1": 0.2434507358645241, + "f1_weighted": 0.2570368114424851 + }, + { + "accuracy": 0.3433333333333333, + "f1": 0.3154923975243301, + "f1_weighted": 0.3321095905741091 + } + ] + }, + { + "accuracy": 0.22737500000000002, + "f1": 0.22201108018236107, + "f1_weighted": 0.2268334566577134, + "hf_subset": "sun", + "languages": [ + "sun-Latn" + ], + "main_score": 0.22201108018236107, + "scores_per_experiment": [ + { + "accuracy": 0.26375, + "f1": 0.26468537010811055, + "f1_weighted": 0.27241507710618595 + }, + { + "accuracy": 0.215, + "f1": 0.21243401329304484, + "f1_weighted": 0.21401278307641203 + }, + { + "accuracy": 0.25, + "f1": 0.24041892287793698, + "f1_weighted": 0.24992888730438692 + }, + { + "accuracy": 0.24, + "f1": 0.2437984335090524, + "f1_weighted": 0.2470727529412417 + }, + { + "accuracy": 0.1975, + "f1": 0.1916703232959061, + "f1_weighted": 0.19990974511107013 + }, + { + "accuracy": 0.2225, + "f1": 0.20044414068164243, + "f1_weighted": 0.22217627866958062 + }, + { + "accuracy": 0.21875, + "f1": 0.2233498238865721, + "f1_weighted": 0.22554046966128205 + }, + { + "accuracy": 0.22875, + "f1": 0.22585718909251032, + "f1_weighted": 0.22904241320467095 + }, + { + "accuracy": 0.21625, + "f1": 0.19565547010330434, + "f1_weighted": 0.19907489223174352 + }, + { + "accuracy": 0.22125, + "f1": 0.22179711497553087, + "f1_weighted": 0.20916126727055986 + } + ] + } + ] + }, + "task_name": "NusaParagraphEmotionClassification" +} \ No newline at end of file