Skip to content

Commit

Permalink
Improve Nautilus [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
ping committed Jun 18, 2023
1 parent b35e79a commit 7204f0a
Showing 1 changed file with 11 additions and 36 deletions.
47 changes: 11 additions & 36 deletions recipes/nautilus.recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,32 +52,9 @@ class Nautilus(BasicNewsrackRecipe, BasicNewsRecipe):
div.wp-block-image div { font-size: 0.8rem; }
blockquote.wp-block-quote { font-size: 1.25rem; margin-left: 0; text-align: center; }
div.feature-image img, div.wp-block-image img { display: block; max-width: 100%; height: auto; }
.article-author { margin-top: 2rem; border-top: solid 1px; padding-top: 0.5rem; font-style: italic; }
"""

feeds = [
("Anthropology", "https://nautil.us/topics/anthropology/feed/"),
("Arts", "https://nautil.us/topics/arts/feed/"),
("Astronomy", "https://nautil.us/topics/astronomy/feed/"),
("Communication", "https://nautil.us/topics/communication/feed/"),
("Economics", "https://nautil.us/topics/economics/feed/"),
("Environment", "https://nautil.us/topics/environment/feed/"),
("Evolution", "https://nautil.us/topics/evolution/feed/"),
("Genetics", "https://nautil.us/topics/genetics/feed/"),
("Geoscience", "https://nautil.us/topics/geoscience/feed/"),
("Health", "https://nautil.us/topics/health/feed/"),
("History", "https://nautil.us/topics/history/feed/"),
("Math", "https://nautil.us/topics/math/feed/"),
("Microbiology", "https://nautil.us/topics/microbiology/feed/"),
("Neuroscience", "https://nautil.us/topics/neuroscience/feed/"),
("Paleontology", "https://nautil.us/topics/paleontology/feed/"),
("Philosophy", "https://nautil.us/topics/philosophy/feed/"),
("Physics", "https://nautil.us/topics/physics/feed/"),
("Psychology", "https://nautil.us/topics/psychology/feed/"),
("Sociology", "https://nautil.us/topics/sociology/feed/"),
("Technology", "https://nautil.us/topics/technology/feed/"),
("Zoology", "https://nautil.us/topics/zoology/feed/"),
]

def get_feeds(self):
soup = self.index_to_soup("https://nautil.us/")
topics = soup.find_all(
Expand All @@ -94,17 +71,6 @@ def populate_article_metadata(self, article, __, _):
self.pub_date = article.utctime
self.title = format_title(_name, article.utctime)

# def get_cover_url(self):
# soup = self.index_to_soup("https://www.presspassnow.com/nautilus/issues/")
# div = soup.find("div", **classes("image-fade_in_back"))
# if div:
# self.cover_url = (
# div.find("img", attrs={"srcset": True})["srcset"]
# .split(",")[-1]
# .split()[0]
# )
# return getattr(self, "cover_url", self.cover_url)

def preprocess_html(self, soup):
breadcrumb = soup.find("ul", attrs={"class": "breadcrumb"})
if breadcrumb:
Expand All @@ -128,6 +94,15 @@ def preprocess_html(self, soup):
if len(p.get_text(strip=True)) == 0:
p.decompose()

for img in soup.findAll("img", attrs={"data-src": True}):
for img in soup.find_all("img", attrs={"data-src": True}):
img["src"] = img["data-src"].split("?")[0]

# convert author ul/li
for ul in soup.find_all("ul", class_="article-author"):
for li in ul.find_all("li", class_="article-author-box"):
for p in li.find_all("p"):
p.name = "div"
li.name = "div"
ul.name = "div"

return soup

0 comments on commit 7204f0a

Please sign in to comment.