Skip to content

Commit

Permalink
fix: Add NusaParagraph Emotion Classification (#928)
Browse files Browse the repository at this point in the history
* add NusaTranslationEmotionClassification

* update name

* add new task

* add new task

* Update mteb/tasks/Classification/multilingual/NusaParagraphEmotionClassification.py

Co-authored-by: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>

* add sizes

* add point

* add results

* update desc

---------

Co-authored-by: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
  • Loading branch information
gentaiscool and KennethEnevoldsen authored Jun 15, 2024
1 parent 0f51819 commit 5e4ad44
Show file tree
Hide file tree
Showing 6 changed files with 1,326 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/mmteb/points/928.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"GitHub": "gentaiscool", "New dataset": 2}
{"GitHub": "KennethEnevoldsen", "Review PR": 2}
1 change: 1 addition & 0 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"Cross-Lingual Semantic Discrimination",
"Textual Entailment",
"Counterfactual Detection",
"Emotion classification",
]

TASK_DOMAIN = Literal[
Expand Down
1 change: 1 addition & 0 deletions mteb/tasks/Classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
from .multilingual.MultilingualSentimentClassification import *
from .multilingual.NaijaSenti import *
from .multilingual.NordicLangClassification import *
from .multilingual.NusaParagraphEmotionClassification import *
from .multilingual.NusaParagraphTopicClassification import *
from .multilingual.NusaXSenti import *
from .multilingual.ScalaClassification import *
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import AbsTaskClassification, MultilingualTask

_LANGUAGES = {
"btk": ["bbc-Latn"],
"bew": ["bew-Latn"],
"bug": ["bug-Latn"],
"jav": ["jav-Latn"],
"mad": ["mad-Latn"],
"mak": ["mak-Latn"],
"min": ["min-Latn"],
"mui": ["mui-Latn"],
"rej": ["rej-Latn"],
"sun": ["sun-Latn"],
}


class NusaParagraphEmotionClassification(MultilingualTask, AbsTaskClassification):
metadata = TaskMetadata(
name="NusaParagraphEmotionClassification",
dataset={
"path": "gentaiscool/nusaparagraph_emot",
"revision": "c61e8c3ee47d2dce296e9601195916b54c21d575",
},
description="NusaParagraphEmotionClassification is a multi-class emotion classification on 10 Indonesian languages from the NusaParagraph dataset.",
reference="https://github.com/IndoNLP/nusa-writes",
category="s2s",
type="Classification",
eval_splits=["test"],
eval_langs=_LANGUAGES,
main_score="f1",
date=("2021-08-01", "2022-07-01"),
form=["written"],
domains=["Non-fiction", "Fiction"],
task_subtypes=["Emotion classification"],
license="Apache 2.0",
socioeconomic_status="mixed",
annotations_creators="human-annotated",
dialect=[],
text_creation="found",
bibtex_citation="""
@inproceedings{cahyawijaya-etal-2023-nusawrites,
title = "NusaWrites: Constructing High-Quality Corpora for Underrepresented and Extremely Low-Resource Languages",
author = "Cahyawijaya, Samuel and Lovenia, Holy and Koto, Fajri and Adhista, Dea and Dave, Emmanuel and Oktavianti, Sarah and Akbar, Salsabil and Lee, Jhonson and Shadieq, Nuur and Cenggoro, Tjeng Wawan and Linuwih, Hanung and Wilie, Bryan and Muridan, Galih and Winata, Genta and Moeljadi, David and Aji, Alham Fikri and Purwarianti, Ayu and Fung, Pascale",
editor = "Park, Jong C. and Arase, Yuki and Hu, Baotian and Lu, Wei and Wijaya, Derry and Purwarianti, Ayu and Krisnadhi, Adila Alfa",
booktitle = "Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = nov,
year = "2023",
address = "Nusa Dua, Bali",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.ijcnlp-main.60",
pages = "921--945",
}
""",
n_samples={"train": 15516, "validation": 2948, "test": 6250},
avg_character_length={"train": 740.24, "validation": 740.66, "test": 740.71},
)
Loading

0 comments on commit 5e4ad44

Please sign in to comment.