diff --git a/docs/create_tasks_table.py b/docs/create_tasks_table.py index f11823a0a0..3599346846 100644 --- a/docs/create_tasks_table.py +++ b/docs/create_tasks_table.py @@ -43,9 +43,7 @@ def task_to_markdown_row(task: mteb.AbsTask) -> str: name_w_reference += author_from_bibtex(task.metadata.bibtex_citation) - languages = sorted(list(task.metadata.languages)) - - return f"| {name_w_reference} | {languages} | {task.metadata.type} | {task.metadata.category} | {domains} | {n_samples} | {avg_character_length} |" + return f"| {name_w_reference} | {task.metadata.languages} | {task.metadata.type} | {task.metadata.category} | {domains} | {n_samples} | {avg_character_length} |" def create_tasks_table(tasks: list[mteb.AbsTask]) -> str: diff --git a/docs/mmteb/points/578.jsonl b/docs/mmteb/points/578.jsonl new file mode 100644 index 0000000000..087efc1143 --- /dev/null +++ b/docs/mmteb/points/578.jsonl @@ -0,0 +1,2 @@ +{"GitHub": "isaac-chung", "Review PR": 2} +{"GitHub": "antoniolanza1996", "Bug fixes": 2} \ No newline at end of file diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 9d3cf6a34e..79874d294a 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -99,7 +99,7 @@ def evaluate(self, model, split="test"): raise NotImplementedError @property - def languages(self) -> set[str]: + def languages(self) -> list[str]: """Returns the languages of the task""" return self.metadata.languages @@ -110,7 +110,7 @@ def __repr__(self) -> str: """ langs = self.languages if len(langs) > 3: - langs = list(langs)[:3] + langs = langs[:3] langs.append("...") return ( f"{self.__class__.__name__}(name='{self.metadata.name}', languages={langs})" diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index d03cdc764a..563606e52f 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -235,17 +235,21 @@ def _check_language_code(code): ) @property - def languages(self) -> set[str]: + def languages(self) -> list[str]: """Return the languages of the dataset as iso639-3 codes.""" def get_lang(lang: str) -> str: return lang.split("-")[0] if isinstance(self.eval_langs, dict): - return set( - get_lang(lang) for langs in self.eval_langs.values() for lang in langs + return sorted( + set( + get_lang(lang) + for langs in self.eval_langs.values() + for lang in langs + ) ) - return set(sorted([get_lang(lang) for lang in self.eval_langs])) + return sorted(set([get_lang(lang) for lang in self.eval_langs])) @property def scripts(self) -> set[str]: