Skip to content

Commit

Permalink
New recipes from Substack [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
ping committed Oct 15, 2023
1 parent 3e6775b commit 5e86ab3
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 0 deletions.
22 changes: 22 additions & 0 deletions _recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,28 @@
enable_on=first_n_days_of_month(7, -5) or last_n_days_of_month(7, -5),
tags=["technology"],
),
Recipe(
recipe="mollywhite-newsletter",
slug="mollywhite-newsletter",
src_ext="mobi",
target_ext=["epub"],
category="Blogs/Newsletters",
tags=["tech"],
cover_options=CoverOptions(
logo_path_or_url="https://substackcdn.com/image/fetch/w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2F3b6a58dc-123a-492b-a1e2-b46138add2b9_856x856.png"
),
),
Recipe(
recipe="natesilver",
slug="natesilver",
src_ext="mobi",
target_ext=["epub"],
category="Blogs/Newsletters",
tags=["commentary"],
cover_options=CoverOptions(
logo_path_or_url="https://substackcdn.com/image/fetch/w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9798f361-e880-406c-9ed4-29229df02c27_256x256.png"
),
),
Recipe(
recipe="nature",
slug="nature",
Expand Down
56 changes: 56 additions & 0 deletions recipes/mollywhite-newsletter.recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""
newsletter.mollywhite.net
"""
import os
import sys

# custom include to share code between recipes
sys.path.append(os.environ["recipes_includes"])
from recipes_shared import BasicNewsrackRecipe, format_title

from calibre.web.feeds.news import BasicNewsRecipe

_name = "Molly White"


class MollyWhiteNewsletter(BasicNewsrackRecipe, BasicNewsRecipe):
title = _name
description = "Keep up with the happenings in the tech world without all the boosterism. Cryptocurrency critic, technology researcher, and software engineer Molly White publishes a weekly explainer of the latest news and developments in the cryptocurrency industry, with summaries of the latest disasters featured on her well-known project Web3 is Going Just Great. https://newsletter.mollywhite.net/"
language = "en"
__author__ = "ping"
publication_type = "blog"
use_embedded_content = True
auto_cleanup = False

oldest_article = 30 # days
max_articles_per_feed = 30

remove_tags = [dict(class_=["subscription-widget-wrap", "image-link-expand"])]
remove_attributes = ["width"]

extra_css = """
.captioned-image-container img {
display: block;
max-width: 100%;
height: auto;
box-sizing: border-box;
}
.captioned-image-container .image-caption { font-size: 0.8rem; margin-top: 0.2rem; }
blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
blockquote p { margin: 0.4rem 0; }
.footnote { color: dimgray; }
.footnote .footnote-content p { margin-top: 0; }
"""

feeds = [
(_name, "https://newsletter.mollywhite.net/feed"),
]

def populate_article_metadata(self, article, __, _):
if (not self.pub_date) or article.utctime > self.pub_date:
self.pub_date = article.utctime
self.title = format_title(_name, article.utctime)

def parse_feeds(self):
return self.group_feeds_by_date(timezone_offset_hours=6)
67 changes: 67 additions & 0 deletions recipes/natesilver.recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
natesilver.net
"""
import os
import sys

# custom include to share code between recipes
sys.path.append(os.environ["recipes_includes"])
from recipes_shared import BasicNewsrackRecipe, format_title

from calibre.web.feeds.news import BasicNewsRecipe

_name = "Nate Silver"


class NateSilver(BasicNewsrackRecipe, BasicNewsRecipe):
title = _name
description = "Nate Silver is the founder and editor in chief of FiveThirtyEight. https://www.natesilver.net/"
language = "en"
__author__ = "ping"
publication_type = "blog"
masthead_url = "https://substackcdn.com/image/fetch/w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9798f361-e880-406c-9ed4-29229df02c27_256x256.png"
use_embedded_content = True
auto_cleanup = False

oldest_article = 30 # days
max_articles_per_feed = 30

remove_tags = [
dict(class_=["subscription-widget-wrap", "image-link-expand", "button-wrapper"])
]
remove_attributes = ["width"]

extra_css = """
.captioned-image-container img {
display: block;
max-width: 100%;
height: auto;
box-sizing: border-box;
}
.captioned-image-container .image-caption { font-size: 0.8rem; margin-top: 0.2rem; }
blockquote { font-size: 1.25rem; margin-left: 0; text-align: center; }
blockquote p { margin: 0.4rem 0; }
.footnote { color: dimgray; }
.footnote .footnote-content p { margin-top: 0; }
"""

feeds = [
(_name, "https://www.natesilver.net/feed"),
]

def preprocess_html(self, soup):
paywall_ele = soup.find(attrs={"data-component-name": "Paywall"})
if paywall_ele:
err_msg = f'Article is paywalled: "{self.tag_to_string(soup.find("h1"))}"'
self.log.warning(err_msg)
self.abort_article(err_msg)
return soup

def populate_article_metadata(self, article, __, _):
if (not self.pub_date) or article.utctime > self.pub_date:
self.pub_date = article.utctime
self.title = format_title(_name, article.utctime)

def parse_feeds(self):
return self.group_feeds_by_date(timezone_offset_hours=6)

0 comments on commit 5e86ab3

Please sign in to comment.