forked from ping/newsrack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathasian-review.recipe.py
80 lines (62 loc) · 2.92 KB
/
asian-review.recipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
# -*- mode: python -*-
# -*- coding: utf-8 -*-
"""
asianreviewofbooks.com
"""
# Original from https://github.com/kovidgoyal/calibre/blob/29cd8d64ea71595da8afdaec9b44e7100bff829a/recipes/asianreviewofbooks.recipe
__license__ = "GPL v3"
__copyright__ = "2012-2017, Darko Miletic <darko.miletic at gmail.com>"
import os
import sys
# custom include to share code between recipes
sys.path.append(os.environ["recipes_includes"])
from recipes_shared import BasicNewsrackRecipe, format_title
from calibre.web.feeds.news import BasicNewsRecipe
_name = "Asian Review of Books"
class AsianReviewOfBooks(BasicNewsrackRecipe, BasicNewsRecipe):
title = _name
__author__ = "Darko Miletic"
description = "In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication. https://asianreviewofbooks.com/" # noqa
publisher = "The Asian Review of Books"
category = "literature, books, reviews, Asia"
language = "en"
publication_type = "magazine"
masthead_url = "https://i2.wp.com/asianreviewofbooks.com/content/wp-content/uploads/2016/09/ARBwidelogo.png"
oldest_article = 30
max_articles_per_feed = 30
conversion_options = {
"comment": description,
"tags": category,
"publisher": publisher,
"language": language,
}
remove_attributes = ["width", "height"]
keep_only_tags = [
dict(name="main"),
]
remove_tags = [
dict(class_=["entry-meta", "sharedaddy", "jp-relatedposts", "entry-footer"])
]
extra_css = """
blockquote { font-size: 1.2rem; margin-left: 0; font-style: italic; }
.wp-caption-text, .entry-featured__caption { display: block; font-size: 0.8rem; margin-top: 0.2rem; }
"""
feeds = [("Articles", "http://asianreviewofbooks.com/content/feed/")]
def populate_article_metadata(self, article, soup, _):
if not self.pub_date or self.pub_date < article.utctime:
self.pub_date = article.utctime
self.title = format_title(_name, self.pub_date)
def preprocess_html(self, soup):
# find empty <p>
paras = soup.find_all("p")
for p in paras:
if not p.text.strip():
p.decompose()
quotes = soup.find_all("h5")
for q in quotes:
q.name = "blockquote"
bio = soup.find_all("h6")
for b in bio:
b.name = "div"
return soup