Skip to content

Commit

Permalink
fix: Update annotations for multilingual classification tasks (#923)
Browse files Browse the repository at this point in the history
* Update MTOPIntentClassification.py

* Update MTOPDomainClassification.py

* Update MassiveIntentClassification.py

* Update MassiveScenarioClassification.py

* Update MassiveScenarioClassification.py

* Update MassiveIntentClassification.py

* add points

---------

Co-authored-by: Tikhonova Maria <m_tikhonova94@mail.ru>
  • Loading branch information
artemsnegirev and MariyaTikhonova authored Jun 15, 2024
1 parent bd025a2 commit 568651b
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 36 deletions.
5 changes: 5 additions & 0 deletions docs/mmteb/points/923.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"GitHub": "MariyaTikhonova", "Dataset annotations": 1}
{"GitHub": "anpalmak2003", "Dataset annotations": 1}
{"GitHub": "ab1992ao", "Dataset annotations": 1}
{"GitHub": "Alenush", "Dataset annotations": 1}
{"GitHub": "KennethEnevoldsen", "Review PR": 2}
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ class MTOPDomainClassification(MultilingualTask, AbsTaskClassification):
eval_splits=["validation", "test"],
eval_langs=_LANGUAGES,
main_score="accuracy",
date=None,
form=None,
domains=None,
task_subtypes=None,
license=None,
socioeconomic_status=None,
annotations_creators=None,
dialect=None,
text_creation=None,
date=("2020-01-01", "2020-12-31"),
form=["spoken"],
domains=["Spoken"],
task_subtypes=[],
license="Not specified",
socioeconomic_status="mixed",
annotations_creators="human-annotated",
dialect=[],
text_creation="created",
bibtex_citation="""@inproceedings{li-etal-2021-mtop,
title = "{MTOP}: A Comprehensive Multilingual Task-Oriented Semantic Parsing Benchmark",
author = "Li, Haoran and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ class MTOPIntentClassification(MultilingualTask, AbsTaskClassification):
eval_splits=["validation", "test"],
eval_langs=_LANGUAGES,
main_score="accuracy",
date=None,
form=None,
domains=None,
task_subtypes=None,
license=None,
socioeconomic_status=None,
annotations_creators=None,
dialect=None,
text_creation=None,
date=("2020-01-01", "2020-12-31"),
form=["spoken"],
domains=["Spoken"],
task_subtypes=[],
license="Not specified",
socioeconomic_status="mixed",
annotations_creators="human-annotated",
dialect=[],
text_creation="created",
bibtex_citation="""@inproceedings{li-etal-2021-mtop,
title = "{MTOP}: A Comprehensive Multilingual Task-Oriented Semantic Parsing Benchmark",
author = "Li, Haoran and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@ class MassiveIntentClassification(MultilingualTask, AbsTaskClassification):
eval_splits=["validation", "test"],
eval_langs=_LANGUAGES,
main_score="accuracy",
date=None,
form=None,
domains=None,
task_subtypes=None,
license=None,
socioeconomic_status=None,
annotations_creators=None,
dialect=None,
text_creation=None,
date=("2022-01-01", "2022-04-22"),
form=["spoken"],
domains=["Spoken"],
task_subtypes=[],
license="Apache 2.0",
socioeconomic_status="mixed",
annotations_creators="human-annotated",
dialect=[],
text_creation="created",
bibtex_citation="""@misc{fitzgerald2022massive,
title={MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages},
author={Jack FitzGerald and Christopher Hench and Charith Peris and Scott Mackie and Kay Rottmann and Ana Sanchez and Aaron Nash and Liam Urbach and Vishesh Kakarala and Richa Singh and Swetha Ranganath and Laurie Crist and Misha Britan and Wouter Leeuwis and Gokhan Tur and Prem Natarajan},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@ class MassiveScenarioClassification(MultilingualTask, AbsTaskClassification):
eval_splits=["validation", "test"],
eval_langs=_LANGUAGES,
main_score="accuracy",
date=None,
form=None,
domains=None,
task_subtypes=None,
license=None,
socioeconomic_status=None,
annotations_creators=None,
dialect=None,
text_creation=None,
date=("2022-01-01", "2022-04-22"),
form=["spoken"],
domains=["Spoken"],
task_subtypes=[],
license="Apache 2.0",
socioeconomic_status="mixed",
annotations_creators="human-annotated",
dialect=[],
text_creation="created",
bibtex_citation="""@misc{fitzgerald2022massive,
title={MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages},
author={Jack FitzGerald and Christopher Hench and Charith Peris and Scott Mackie and Kay Rottmann and Ana Sanchez and Aaron Nash and Liam Urbach and Vishesh Kakarala and Richa Singh and Swetha Ranganath and Laurie Crist and Misha Britan and Wouter Leeuwis and Gokhan Tur and Prem Natarajan},
Expand Down

0 comments on commit 568651b

Please sign in to comment.