From 859c07819f8caeac296cd31553dc7e396e7d0028 Mon Sep 17 00:00:00 2001 From: Ross Williams Date: Thu, 30 Jan 2025 13:51:57 +0000 Subject: [PATCH 1/4] Invert attempt limit check It might never have worked! --- notifier/wikidot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notifier/wikidot.py b/notifier/wikidot.py index 872162f..d40ee75 100644 --- a/notifier/wikidot.py +++ b/notifier/wikidot.py @@ -154,7 +154,7 @@ def module( cookies={"wikidot_token7": token7}, ) except ConnectionError as error: - will_retry = attempt_count > self.MODULE_ATTEMPT_LIMIT + will_retry = attempt_count < self.MODULE_ATTEMPT_LIMIT logger.debug( "Module connection failed %s", { @@ -189,7 +189,7 @@ def module( and response["message"] == "An error occurred while processing the request." ): - will_retry = attempt_count > self.MODULE_ATTEMPT_LIMIT + will_retry = attempt_count < self.MODULE_ATTEMPT_LIMIT if will_retry: logger.warning( "Wikidot internal failure, retrying in 10 seconds %s", From 5e1e5757aff7d62416ddd20973a71082d5535fd3 Mon Sep 17 00:00:00 2001 From: Ross Williams Date: Thu, 30 Jan 2025 13:52:17 +0000 Subject: [PATCH 2/4] Retry page get when on bork I have not yet reproduced a bork so I don't know how to detect one just yet. --- notifier/wikidot.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/notifier/wikidot.py b/notifier/wikidot.py index d40ee75..6560c5a 100644 --- a/notifier/wikidot.py +++ b/notifier/wikidot.py @@ -454,7 +454,29 @@ def get_page_id(self, wiki_id: str, slug: str) -> int: "s" if wiki["secure"] else "", wiki_id, slug ) ) - page = self._session.get(page_url).text + + page = None + for attempt_count in range(self.MODULE_ATTEMPT_LIMIT): + attempt_delay = 2**attempt_count * self.PAGINATION_DELAY_S + will_retry = attempt_count < self.MODULE_ATTEMPT_LIMIT + time.sleep(attempt_delay) + page = self._session.get(page_url).text + + if page.is_wikidot_error(): + logger.warning( + "Wikibork when getting page %s", + { + "url": page_url, + "attempt_number": attempt_count + 1, + "attempt_delay_s": attempt_delay, + "max_attempts": self.MODULE_ATTEMPT_LIMIT, + "will_retry": will_retry, + }, + ) + if not will_retry: + raise Wikibork + assert page is not None + return int( cast(Match[str], re.search(r"pageId = ([0-9]+);", page)).group(1) ) From cdac35f85894c907ffee7acb7138411c4b74abc3 Mon Sep 17 00:00:00 2001 From: Ross Williams Date: Tue, 4 Feb 2025 13:52:47 +0000 Subject: [PATCH 3/4] Check status code before getting page ID --- notifier/wikidot.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/notifier/wikidot.py b/notifier/wikidot.py index 6560c5a..d386b83 100644 --- a/notifier/wikidot.py +++ b/notifier/wikidot.py @@ -455,14 +455,14 @@ def get_page_id(self, wiki_id: str, slug: str) -> int: ) ) - page = None + page_text = None for attempt_count in range(self.MODULE_ATTEMPT_LIMIT): attempt_delay = 2**attempt_count * self.PAGINATION_DELAY_S will_retry = attempt_count < self.MODULE_ATTEMPT_LIMIT time.sleep(attempt_delay) - page = self._session.get(page_url).text + response = self._session.get(page_url) - if page.is_wikidot_error(): + if response.status_code == 500: logger.warning( "Wikibork when getting page %s", { @@ -473,12 +473,33 @@ def get_page_id(self, wiki_id: str, slug: str) -> int: "will_retry": will_retry, }, ) - if not will_retry: + if will_retry: + continue raise Wikibork - assert page is not None + + if response.status_code != 200: + logger.warning( + "Failed to get page %s", + { + "url": page_url, + "status_code": response.status_code, + "attempt_number": attempt_count + 1, + "attempt_delay_s": attempt_delay, + "max_attempts": self.MODULE_ATTEMPT_LIMIT, + "will_retry": will_retry, + }, + ) + if will_retry: + continue + raise OngoingConnectionError + + page_text = response.text + assert page_text is not None return int( - cast(Match[str], re.search(r"pageId = ([0-9]+);", page)).group(1) + cast( + Match[str], re.search(r"pageId = ([0-9]+);", page_text) + ).group(1) ) def rename_page(self, wiki_id: str, from_slug: str, to_slug: str) -> None: From 43ae4d9c762ed26907f9d40a7f568a863bb41729 Mon Sep 17 00:00:00 2001 From: Ross Williams Date: Tue, 4 Feb 2025 13:53:20 +0000 Subject: [PATCH 4/4] Don't crash on missing post That's not something that I can control, but I'd still like to be notified about it for now --- notifier/newposts.py | 1 - 1 file changed, 1 deletion(-) diff --git a/notifier/newposts.py b/notifier/newposts.py index 4f26f55..f7c087d 100644 --- a/notifier/newposts.py +++ b/notifier/newposts.py @@ -130,7 +130,6 @@ def fetch_posts_with_context( "post_id": post_id, }, ) - raise RuntimeError("Requested post missing from downloaded thread") # For each kind of context, check if we already have it, and if not, fetch it