Skip to content

Commit

Permalink
Update language identification function to return language probabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipMay committed Jan 29, 2024
1 parent dae8c67 commit 1470169
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 7 deletions.
2 changes: 1 addition & 1 deletion mltb2/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __call__(self, text: str, num_lang: int = 10, always_detect_lang: Optional[L
Args:
text: the text for which the language is to be recognized
num_lang: number of returned languages
num_lang: number of returned language probabilities
always_detect_lang: A list of languages that should always be returned
even if not detected. If the language is not detected, the probability
is set to 0.0.
Expand Down
9 changes: 3 additions & 6 deletions tests/test_fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ def test_fasttext_language_identification_call():

def test_fasttext_language_identification_call_with_always_detect_lang():
language_identification = FastTextLanguageIdentification()
languages = language_identification("This is an English sentence.")
languages = language_identification("This is an English sentence.", always_detect_lang=["fake_language"])
assert languages is not None
assert len(languages) == 10
languages_with_de = language_identification("This is an English sentence.", always_detect_lang=["de"])
assert languages_with_de is not None
assert len(languages_with_de) == 11
assert "de" in languages_with_de
assert len(languages) == 11
assert "fake_language" in languages

0 comments on commit 1470169

Please sign in to comment.