Skip to content

Commit

Permalink
fix: languages is a sorted-list now (#578)
Browse files Browse the repository at this point in the history
* refactor: languages is a sorted-list now

* refactor: fix linting

* remove newline

---------

Co-authored-by: Isaac Chung <chungisaac1217@gmail.com>
  • Loading branch information
antoniolanza1996 and isaac-chung authored Apr 26, 2024
1 parent 20a1ca4 commit adcc8c6
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 9 deletions.
4 changes: 1 addition & 3 deletions docs/create_tasks_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ def task_to_markdown_row(task: mteb.AbsTask) -> str:

name_w_reference += author_from_bibtex(task.metadata.bibtex_citation)

languages = sorted(list(task.metadata.languages))

return f"| {name_w_reference} | {languages} | {task.metadata.type} | {task.metadata.category} | {domains} | {n_samples} | {avg_character_length} |"
return f"| {name_w_reference} | {task.metadata.languages} | {task.metadata.type} | {task.metadata.category} | {domains} | {n_samples} | {avg_character_length} |"


def create_tasks_table(tasks: list[mteb.AbsTask]) -> str:
Expand Down
2 changes: 2 additions & 0 deletions docs/mmteb/points/578.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"GitHub": "isaac-chung", "Review PR": 2}
{"GitHub": "antoniolanza1996", "Bug fixes": 2}
4 changes: 2 additions & 2 deletions mteb/abstasks/AbsTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def evaluate(self, model, split="test"):
raise NotImplementedError

@property
def languages(self) -> set[str]:
def languages(self) -> list[str]:
"""Returns the languages of the task"""
return self.metadata.languages

Expand All @@ -110,7 +110,7 @@ def __repr__(self) -> str:
"""
langs = self.languages
if len(langs) > 3:
langs = list(langs)[:3]
langs = langs[:3]
langs.append("...")
return (
f"{self.__class__.__name__}(name='{self.metadata.name}', languages={langs})"
Expand Down
12 changes: 8 additions & 4 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,17 +235,21 @@ def _check_language_code(code):
)

@property
def languages(self) -> set[str]:
def languages(self) -> list[str]:
"""Return the languages of the dataset as iso639-3 codes."""

def get_lang(lang: str) -> str:
return lang.split("-")[0]

if isinstance(self.eval_langs, dict):
return set(
get_lang(lang) for langs in self.eval_langs.values() for lang in langs
return sorted(
set(
get_lang(lang)
for langs in self.eval_langs.values()
for lang in langs
)
)
return set(sorted([get_lang(lang) for lang in self.eval_langs]))
return sorted(set([get_lang(lang) for lang in self.eval_langs]))

@property
def scripts(self) -> set[str]:
Expand Down

0 comments on commit adcc8c6

Please sign in to comment.