From 14701694779b5fe0f4bd4d6604877b73e1d23ef7 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Mon, 29 Jan 2024 10:50:57 +0100 Subject: [PATCH] Update language identification function to return language probabilities --- mltb2/fasttext.py | 2 +- tests/test_fasttext.py | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/mltb2/fasttext.py b/mltb2/fasttext.py index 1eb758f..3d7e042 100644 --- a/mltb2/fasttext.py +++ b/mltb2/fasttext.py @@ -57,7 +57,7 @@ def __call__(self, text: str, num_lang: int = 10, always_detect_lang: Optional[L Args: text: the text for which the language is to be recognized - num_lang: number of returned languages + num_lang: number of returned language probabilities always_detect_lang: A list of languages that should always be returned even if not detected. If the language is not detected, the probability is set to 0.0. diff --git a/tests/test_fasttext.py b/tests/test_fasttext.py index bb4037e..3dd7d8e 100644 --- a/tests/test_fasttext.py +++ b/tests/test_fasttext.py @@ -20,10 +20,7 @@ def test_fasttext_language_identification_call(): def test_fasttext_language_identification_call_with_always_detect_lang(): language_identification = FastTextLanguageIdentification() - languages = language_identification("This is an English sentence.") + languages = language_identification("This is an English sentence.", always_detect_lang=["fake_language"]) assert languages is not None - assert len(languages) == 10 - languages_with_de = language_identification("This is an English sentence.", always_detect_lang=["de"]) - assert languages_with_de is not None - assert len(languages_with_de) == 11 - assert "de" in languages_with_de + assert len(languages) == 11 + assert "fake_language" in languages