diff --git a/docs/mmteb/points/501.jsonl b/docs/mmteb/points/501.jsonl new file mode 100644 index 0000000000..f72a055c63 --- /dev/null +++ b/docs/mmteb/points/501.jsonl @@ -0,0 +1,3 @@ +{"GitHub": "dipam7", "New dataset": 2} +{"GitHub": "isaac-chung", "Review PR": 2} +{"GitHub": "imenelydiaker", "Review PR": 2} \ No newline at end of file diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py index e9a6fae407..9612006b27 100644 --- a/mteb/tasks/Classification/__init__.py +++ b/mteb/tasks/Classification/__init__.py @@ -24,6 +24,7 @@ from .eng.ContractNLIExplicitIdentificationLegalBenchClassification import * from .eng.ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification import * from .eng.ContractNLILimitedUseLegalBenchClassification import * +from .eng.DBpediaClassification import * from .eng.EmotionClassification import * from .eng.FinancialPhrasebankClassification import * from .eng.ImdbClassification import * diff --git a/mteb/tasks/Classification/eng/DBpediaClassification.py b/mteb/tasks/Classification/eng/DBpediaClassification.py new file mode 100644 index 0000000000..3db1016d04 --- /dev/null +++ b/mteb/tasks/Classification/eng/DBpediaClassification.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks import AbsTaskClassification + + +class DBpediaClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="DBpediaClassification", + description="DBpedia14 is a dataset of English texts from Wikipedia articles, categorized into 14 non-overlapping classes based on their DBpedia ontology.", + reference="https://arxiv.org/abs/1509.01626", + dataset={ + "path": "fancyzhx/dbpedia_14", + "revision": "9abd46cf7fc8b4c64290f26993c540b92aa145ac", + }, + type="Classification", + category="s2s", + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2022-01-25", "2022-01-25"), + form=["written"], + domains=["Encyclopaedic"], + task_subtypes=["Topic classification"], + license="cc-by-sa-3.0", + socioeconomic_status="low", + annotations_creators="derived", + dialect=[], + text_creation="found", + bibtex_citation=""" + @inproceedings{NIPS2015_250cf8b5, + author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett}, + pages = {}, + publisher = {Curran Associates, Inc.}, + title = {Character-level Convolutional Networks for Text Classification}, + url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/250cf8b51c773f3f8dc8b4be867a9a02-Paper.pdf}, + volume = {28}, + year = {2015} + } + """, + n_samples={"test": 70000}, + avg_character_length={"test": 281.40}, + ) + + def dataset_transform(self): + self.dataset = self.dataset.rename_column("content", "text") + self.dataset = self.stratified_subsampling( + self.dataset, seed=self.seed, splits=["train", "test"] + ) diff --git a/results/intfloat__multilingual-e5-small/DBpediaClassification.json b/results/intfloat__multilingual-e5-small/DBpediaClassification.json new file mode 100644 index 0000000000..4929a2144d --- /dev/null +++ b/results/intfloat__multilingual-e5-small/DBpediaClassification.json @@ -0,0 +1,13 @@ +{ + "dataset_revision": "9abd46cf7fc8b4c64290f26993c540b92aa145ac", + "mteb_dataset_name": "DBpediaClassification", + "mteb_version": "1.6.36", + "test": { + "accuracy": 0.871435546875, + "accuracy_stderr": 0.007089500734212557, + "evaluation_time": 38.69, + "f1": 0.863423609172077, + "f1_stderr": 0.008270870625325584, + "main_score": 0.871435546875 + } +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/DBpediaClassification.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/DBpediaClassification.json new file mode 100644 index 0000000000..f8005d1427 --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/DBpediaClassification.json @@ -0,0 +1,13 @@ +{ + "dataset_revision": "9abd46cf7fc8b4c64290f26993c540b92aa145ac", + "mteb_dataset_name": "DBpediaClassification", + "mteb_version": "1.6.36", + "test": { + "accuracy": 0.850830078125, + "accuracy_stderr": 0.013677541375994482, + "evaluation_time": 34.75, + "f1": 0.8482267879321842, + "f1_stderr": 0.01348333759139074, + "main_score": 0.850830078125 + } +} \ No newline at end of file