From 491b7ce0689ef36b57967a1fdc721a17a26a2f2d Mon Sep 17 00:00:00 2001 From: ping Date: Sun, 15 Oct 2023 17:18:43 +0800 Subject: [PATCH] Improve substack recipes [skip ci] --- recipes/mollywhite-newsletter.recipe.py | 24 ++++++++++++++++++++++-- recipes/natesilver.recipe.py | 24 ++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/recipes/mollywhite-newsletter.recipe.py b/recipes/mollywhite-newsletter.recipe.py index 1d81e5d0..0487e633 100644 --- a/recipes/mollywhite-newsletter.recipe.py +++ b/recipes/mollywhite-newsletter.recipe.py @@ -3,10 +3,11 @@ """ import os import sys +from datetime import timezone, timedelta # custom include to share code between recipes sys.path.append(os.environ["recipes_includes"]) -from recipes_shared import BasicNewsrackRecipe, format_title +from recipes_shared import BasicNewsrackRecipe, format_title, get_date_format from calibre.web.feeds.news import BasicNewsRecipe @@ -25,10 +26,13 @@ class MollyWhiteNewsletter(BasicNewsrackRecipe, BasicNewsRecipe): oldest_article = 30 # days max_articles_per_feed = 30 + keep_only_tags = [dict(name="article")] remove_tags = [dict(class_=["subscription-widget-wrap", "image-link-expand"])] remove_attributes = ["width"] extra_css = """ + .article-meta { margin-top: 1rem; margin-bottom: 1rem; } + .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; } .captioned-image-container img { display: block; max-width: 100%; @@ -53,4 +57,20 @@ def populate_article_metadata(self, article, __, _): self.title = format_title(_name, article.utctime) def parse_feeds(self): - return self.group_feeds_by_date(timezone_offset_hours=6) + timezone_offset_hours = -6 + feeds = self.group_feeds_by_date(timezone_offset_hours=timezone_offset_hours) + for feed in feeds: + for article in feed.articles: + # inject title and pub date + date_published = article.utctime.replace(tzinfo=timezone.utc) + date_published_loc = date_published.astimezone( + timezone(offset=timedelta(hours=timezone_offset_hours)) + ) + article_soup = self.soup( + f'

{article.title}

{article.content}
" + ) + article.content = str(article_soup) + return feeds diff --git a/recipes/natesilver.recipe.py b/recipes/natesilver.recipe.py index 6cecb92f..f05bc254 100644 --- a/recipes/natesilver.recipe.py +++ b/recipes/natesilver.recipe.py @@ -3,10 +3,11 @@ """ import os import sys +from datetime import timezone, timedelta # custom include to share code between recipes sys.path.append(os.environ["recipes_includes"]) -from recipes_shared import BasicNewsrackRecipe, format_title +from recipes_shared import BasicNewsrackRecipe, format_title, get_date_format from calibre.web.feeds.news import BasicNewsRecipe @@ -26,12 +27,15 @@ class NateSilver(BasicNewsrackRecipe, BasicNewsRecipe): oldest_article = 30 # days max_articles_per_feed = 30 + keep_only_tags = [dict(name="article")] remove_tags = [ dict(class_=["subscription-widget-wrap", "image-link-expand", "button-wrapper"]) ] remove_attributes = ["width"] extra_css = """ + .article-meta { margin-top: 1rem; margin-bottom: 1rem; } + .article-meta .author { font-weight: bold; color: #444; margin-right: 0.5rem; } .captioned-image-container img { display: block; max-width: 100%; @@ -64,4 +68,20 @@ def populate_article_metadata(self, article, __, _): self.title = format_title(_name, article.utctime) def parse_feeds(self): - return self.group_feeds_by_date(timezone_offset_hours=6) + timezone_offset_hours = -6 + feeds = self.group_feeds_by_date(timezone_offset_hours=timezone_offset_hours) + for feed in feeds: + for article in feed.articles: + # inject title and pub date + date_published = article.utctime.replace(tzinfo=timezone.utc) + date_published_loc = date_published.astimezone( + timezone(offset=timedelta(hours=timezone_offset_hours)) + ) + article_soup = self.soup( + f'

{article.title}

{article.content}
" + ) + article.content = str(article_soup) + return feeds