From 46287f31b6a407ba846868d829eb2e456ccbcb80 Mon Sep 17 00:00:00 2001 From: Martin Majlis Date: Mon, 20 Jan 2025 00:05:56 +0100 Subject: [PATCH 1/3] Make error messages more user friendly --- README.rst | 2 +- example.py | 20 +++++++----- tests/wikipedia_test.py | 55 +++++++++++++++++++++++++++++++- wikipediaapi/__init__.py | 68 ++++++++++++++++++++++++++++++---------- 4 files changed, 119 insertions(+), 26 deletions(-) diff --git a/README.rst b/README.rst index 59803b0..2741abb 100644 --- a/README.rst +++ b/README.rst @@ -44,7 +44,7 @@ To initialize it, you have to provide: .. code-block:: python import wikipediaapi - wiki_wiki = wikipediaapi.Wikipedia('MyProjectName (merlin@example.com)', 'en') + wiki_wiki = wikipediaapi.Wikipedia(user_agent='MyProjectName (merlin@example.com)', language='en') page_py = wiki_wiki.page('Python_(programming_language)') diff --git a/example.py b/example.py index 7ce4931..b78ddc9 100755 --- a/example.py +++ b/example.py @@ -7,7 +7,7 @@ user_agent = "Wikipedia-API Example (merlin@example.com)" -wiki_wiki = wikipediaapi.Wikipedia(user_agent, "en") +wiki_wiki = wikipediaapi.Wikipedia(user_agent=user_agent, language="en") page_py = wiki_wiki.page("Python_(programming_language)") @@ -89,7 +89,7 @@ def print_categories(page): print("Page - Summary: %s" % page_nonexisting.summary[0:60]) -wiki_de = wikipediaapi.Wikipedia(user_agent, "de") +wiki_de = wikipediaapi.Wikipedia(user_agent=user_agent, language="de") de_page = wiki_de.page("Deutsche Sprache") print(de_page.title + ": " + de_page.fullurl) print(de_page.summary[0:60]) @@ -110,7 +110,7 @@ def print_categorymembers(categorymembers, level=0, max_level=2): print("Category members: Category:Physics") print_categorymembers(cat.categorymembers, max_level=1) -wiki_hi = wikipediaapi.Wikipedia(user_agent, "hi") +wiki_hi = wikipediaapi.Wikipedia(user_agent=user_agent, language="hi") # fetch page about Python in Hindu # https://hi.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8 @@ -122,28 +122,34 @@ def print_categorymembers(categorymembers, level=0, max_level=2): print(p_hi_python_quoted.summary[0:60]) # Fetch page about Python in Chinese -wiki_zh = wikipediaapi.Wikipedia(user_agent, "zh") +wiki_zh = wikipediaapi.Wikipedia(user_agent=user_agent, language="zh") zh_page = wiki_zh.page("Python") print(zh_page.title + ": " + zh_page.fullurl) print(zh_page.summary[0:60]) print(repr(zh_page.varianttitles)) # https://zh.wikipedia.org/zh-cn/Python -wiki_zh_cn = wikipediaapi.Wikipedia(user_agent, "zh", "zh-cn") +wiki_zh_cn = wikipediaapi.Wikipedia( + user_agent=user_agent, language="zh", variant="zh-cn" +) zh_page_cn = wiki_zh_cn.page("Python") print(zh_page_cn.title + ": " + zh_page_cn.fullurl) print(zh_page_cn.summary[0:60]) print(repr(zh_page_cn.varianttitles)) # https://zh.wikipedia.org/zh-tw/Python -wiki_zh_tw = wikipediaapi.Wikipedia(user_agent, "zh", "zh-tw") +wiki_zh_tw = wikipediaapi.Wikipedia( + user_agent=user_agent, language="zh", variant="zh-tw" +) zh_page_tw = wiki_zh_tw.page("Python") print(zh_page_tw.title + ": " + zh_page_tw.fullurl) print(zh_page_tw.summary[0:60]) print(repr(zh_page_tw.varianttitles)) # https://zh.wikipedia.org/zh-sg/Python -wiki_zh_sg = wikipediaapi.Wikipedia(user_agent, "zh", "zh-sg") +wiki_zh_sg = wikipediaapi.Wikipedia( + user_agent=user_agent, language="zh", variant="zh-sg" +) zh_page_sg = wiki_zh_sg.page("Python") print(zh_page_sg.title + ": " + zh_page_sg.fullurl) print(zh_page_sg.summary[0:60]) diff --git a/tests/wikipedia_test.py b/tests/wikipedia_test.py index 7613dbf..85d6e0e 100644 --- a/tests/wikipedia_test.py +++ b/tests/wikipedia_test.py @@ -13,11 +13,63 @@ def test_missing_user_agent_should_fail(self): AssertionError( "Please, be nice to Wikipedia and specify user agent - " + "https://meta.wikimedia.org/wiki/User-Agent_policy. " - + "Current user_agent: 'en' is not sufficient." + + "Current user_agent: 'en' is not sufficient. " + + "Use Wikipedia(user_agent='your-user-agent', language='en')" ) ), ) + def test_swapped_parameters_in_constructor(self): + with self.assertRaises(AssertionError) as e: + wikipediaapi.Wikipedia("en", "my-user-agent") + self.assertEqual( + str(e.exception), + str( + AssertionError( + "Please, be nice to Wikipedia and specify user agent - " + + "https://meta.wikimedia.org/wiki/User-Agent_policy. " + + "Current user_agent: 'en' is not sufficient. " + + "Use Wikipedia(user_agent='your-user-agent', language='en')" + ) + ), + ) + + def test_empty_parameters_in_constructor(self): + with self.assertRaises(AssertionError) as e: + wikipediaapi.Wikipedia("", "") + self.assertEqual( + str(e.exception), + str( + AssertionError( + "Please, be nice to Wikipedia and specify user agent - " + + "https://meta.wikimedia.org/wiki/User-Agent_policy. " + + "Current user_agent: '' is not sufficient. " + + "Use Wikipedia(user_agent='your-user-agent', language='your-language')" + ) + ), + ) + + def test_empty_language_in_constructor(self): + with self.assertRaises(AssertionError) as e: + wikipediaapi.Wikipedia("test-user-agent", "") + self.assertEqual( + str(e.exception), + str( + AssertionError( + "Specify language. Current language: '' is not sufficient. " + + "Use Wikipedia(user_agent='test-user-agent', language='your-language')" + ) + ), + ) + + def test_long_language_and_user_agent(self): + wiki = wikipediaapi.Wikipedia( + user_agent="param-user-agent", language="very-long-language" + ) + self.assertIsNotNone(wiki) + self.assertEqual(wiki.language, "very-long-language") + self.assertIsNone(wiki.variant) + def test_user_agent_is_used(self): wiki = wikipediaapi.Wikipedia( user_agent="param-user-agent", @@ -28,6 +80,7 @@ def test_user_agent_is_used(self): user_agent, "param-user-agent (" + wikipediaapi.USER_AGENT + ")", ) + self.assertEqual(wiki.language, "en") def test_user_agent_in_headers_is_fine(self): wiki = wikipediaapi.Wikipedia( diff --git a/wikipediaapi/__init__.py b/wikipediaapi/__init__.py index 8e9356c..3f53df1 100644 --- a/wikipediaapi/__init__.py +++ b/wikipediaapi/__init__.py @@ -23,6 +23,9 @@ + "; https://github.com/martin-majlis/Wikipedia-API/" ) +MIN_USER_AGENT_LEN = 5 +MAX_LANG_LEN = 5 + log = logging.getLogger(__name__) @@ -163,29 +166,21 @@ def __init__( request_kwargs.setdefault("timeout", 10.0) default_headers = {} if headers is None else headers - if user_agent: + if user_agent is not None: default_headers.setdefault( "User-Agent", user_agent, ) - used_user_agent = default_headers.get("User-Agent") - if not (used_user_agent and len(used_user_agent) > 5): - raise AssertionError( - "Please, be nice to Wikipedia and specify user agent - " - + "https://meta.wikimedia.org/wiki/User-Agent_policy. Current user_agent: '" - + str(used_user_agent) - + "' is not sufficient." - ) + used_language, used_variant, used_user_agent = self._check_and_correct_params( + language, + variant, + default_headers.get("User-Agent"), + ) + default_headers["User-Agent"] += " (" + USER_AGENT + ")" - self.language = language.strip().lower() - if not self.language: - raise AssertionError( - "Specify language. Current language: '" - + str(self.language) - + "' is not sufficient." - ) - self.variant = variant.strip().lower() if variant else variant + self.language = used_language + self.variant = used_variant self.extract_format = extract_format log.info( @@ -722,6 +717,45 @@ def _common_attributes(extract, page: "WikipediaPage"): if attr in extract: page._attributes[attr] = extract[attr] + @staticmethod + def _check_and_correct_params( + language: Optional[str], variant: Optional[str], user_agent: Optional[str] + ) -> tuple[str, Optional[str], str]: + if not user_agent or len(user_agent) < MIN_USER_AGENT_LEN: + raise AssertionError( + "Please, be nice to Wikipedia and specify user agent - " + + "https://meta.wikimedia.org/wiki/User-Agent_policy. Current user_agent: '" + + str(user_agent) + + "' is not sufficient. " + + "Use Wikipedia(user_agent='your-user-agent', language='" + + (str(user_agent) or "your-language") + + "')" + ) + + if not language: + raise AssertionError( + "Specify language. Current language: '" + + str(language) + + "' is not sufficient. " + + "Use Wikipedia(user_agent='" + + str(user_agent) + + "', language='your-language')" + ) + + used_language = language.strip().lower() + if len(used_language) > MAX_LANG_LEN: + log.warning( + "Used language '%s' is longer than %d. It is suspicious", + used_language, + MAX_LANG_LEN, + ) + + return ( + used_language, + variant.strip().lower() if variant else variant, + user_agent, + ) + class WikipediaPageSection: """WikipediaPageSection represents section in the page.""" From c13328bd50cc10a4500b63c3d61fa55bac5df80a Mon Sep 17 00:00:00 2001 From: Martin Majlis Date: Mon, 20 Jan 2025 00:15:55 +0100 Subject: [PATCH 2/3] Fix issues found by deepsource --- wikipediaapi/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/wikipediaapi/__init__.py b/wikipediaapi/__init__.py index 3f53df1..cf30f3f 100644 --- a/wikipediaapi/__init__.py +++ b/wikipediaapi/__init__.py @@ -177,7 +177,7 @@ def __init__( default_headers.get("User-Agent"), ) - default_headers["User-Agent"] += " (" + USER_AGENT + ")" + default_headers["User-Agent"] = used_language + " (" + USER_AGENT + ")" self.language = used_language self.variant = used_variant @@ -721,6 +721,14 @@ def _common_attributes(extract, page: "WikipediaPage"): def _check_and_correct_params( language: Optional[str], variant: Optional[str], user_agent: Optional[str] ) -> tuple[str, Optional[str], str]: + """ + Checks the constructor parameters and throws AssertionError if they are incorrect. + Otherwise, it normalises them to easy use later on. + :param language: Language mutation of Wikipedia + :param variant: Language variant + :param user_agent: HTTP User-Agent used in requests + :return: tupple of langage, variant, user_agent + """ if not user_agent or len(user_agent) < MIN_USER_AGENT_LEN: raise AssertionError( "Please, be nice to Wikipedia and specify user agent - " From 3965ea1e57a254e4ef39c0b87330213d7eeb7bb6 Mon Sep 17 00:00:00 2001 From: Martin Majlis Date: Mon, 20 Jan 2025 00:28:32 +0100 Subject: [PATCH 3/3] Fix failing tests --- wikipediaapi/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wikipediaapi/__init__.py b/wikipediaapi/__init__.py index cf30f3f..05f38fc 100644 --- a/wikipediaapi/__init__.py +++ b/wikipediaapi/__init__.py @@ -177,7 +177,7 @@ def __init__( default_headers.get("User-Agent"), ) - default_headers["User-Agent"] = used_language + " (" + USER_AGENT + ")" + default_headers["User-Agent"] = used_user_agent + " (" + USER_AGENT + ")" self.language = used_language self.variant = used_variant @@ -727,7 +727,7 @@ def _check_and_correct_params( :param language: Language mutation of Wikipedia :param variant: Language variant :param user_agent: HTTP User-Agent used in requests - :return: tupple of langage, variant, user_agent + :return: tuple of language, variant, user_agent """ if not user_agent or len(user_agent) < MIN_USER_AGENT_LEN: raise AssertionError(