Skip to content

Commit

Permalink
Improve NYT Mag [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
ping committed Jul 23, 2023
1 parent 745354f commit e6a9270
Showing 1 changed file with 43 additions and 5 deletions.
48 changes: 43 additions & 5 deletions recipes/nytimes-magazine.recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,11 @@ class NYTimesBooks(NYTRecipe, BasicNewsrackRecipe, BasicNewsRecipe):

def populate_article_metadata(self, article, soup, __):
ts_ele = soup.find(attrs={"data-timestamp": True})
if ts_ele:
post_date = self.parse_date(ts_ele["data-timestamp"])
if (not self.pub_date) or post_date > self.pub_date:
self.pub_date = post_date
if not ts_ele:
return
post_date = self.parse_date(ts_ele["data-timestamp"])
if (not self.pub_date) or post_date > self.pub_date:
self.pub_date = post_date

def parse_index(self):
index_url = "https://www.nytimes.com/section/magazine"
Expand All @@ -94,8 +95,45 @@ def parse_index(self):
self.cover_url = issue_cover["src"]
issue_url = urljoin(index_url, issue_link["href"])
soup = self.index_to_soup(issue_url)
self.title = f'{_name}: {self.tag_to_string(soup.find("h1"))}'
info = self.get_script_json(soup, r"window.__preloadedData\s*=\s*")
if info and info.get("initialState"):
content_service = info.get("initialState")
for k, v in content_service["ROOT_QUERY"].items():
if not (
k.startswith("workOrLocation")
and v
and v["typename"] == "LegacyCollection"
):
continue
content_node_id = v["id"]
break
issue_info = content_service.get(content_node_id)
self.pub_date = self.parse_date(
issue_info.get("lastModified") or issue_info["firstPublished"]
)
self.title = f'{_name}: {issue_info["name"]}'
articles = []
for v in content_service.values():
if v.get("__typename", "") != "Article":
continue
try:
articles.append(
{
"url": v["url"],
"title": content_service.get(
v.get("headline", {}).get("id", "")
).get("default"),
"description": v.get("summary", ""),
"date": self.parse_date(v["lastMajorModification"]),
}
)
except Exception as err:
self.log.warning("Error extracting article: %s" % err)

if articles:
return [("Articles", articles)]

self.title = f'{_name}: {self.tag_to_string(soup.find("h1"))}'
articles = []
for article in soup.find_all("article"):
articles.append(
Expand Down

0 comments on commit e6a9270

Please sign in to comment.