-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget.js
112 lines (87 loc) · 2.74 KB
/
get.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
const cheerio = require("cheerio");
const request = require("request");
const fs = require("fs");
class Parser { };
class SLTParser extends Parser {
get_chapter_title(body) {
return body.find(".entry-title").text();
}
get_chapter_text(body) {
let article = body.find("article").clone();
article.find("div > div").remove();
return article.html();
}
get_next_url(body) {
let a_next = body.find("a:contains(Next Chapter)");
return a_next.attr("href");
}
};
class WuxiaParser extends Parser {
get_chapter_title(body) {
return body.find(".caption h4:contains(Chapter)").text();
}
get_chapter_text(body) {
let article = body.find(".p-15 .fr-view");
return article.html();
}
get_next_url(body) {
let a_next = body.find(".next > a");
return a_next.attr("href") != "#" ? "https://www.wuxiaworld.com" + a_next.attr("href") : undefined;
}
}
function get_chapter(parser, url) {
return new Promise((resolve, reject) => {
request.get(url, (err, res, html) => {
if (err) return reject(err);
let body = cheerio(html);
let title = parser.get_chapter_title(body);
let text = parser.get_chapter_text(body);
let next = parser.get_next_url(body);
resolve({ title, text, next });
});
});
}
class Book {
constructor()
{
this.chapters = [];
this.body = "";
}
get_progress()
{
return this.body.length / (2 * 1024 * 1024);
}
add_chapter(chapter)
{
if (this.body.length + chapter.text.length > 2 * 1024 * 1024) return false;
this.chapters.push(chapter);
this.body += chapter.text + "<hr/>";
return true;
}
save_to_file()
{
fs.writeFileSync(
`books/${this.chapters[0].title} to ${this.chapters[this.chapters.length - 1].title}.html`,
this.body,
);
}
};
function get_chapter_recursive(parser, url, book) {
book = book || new Book();
return new Promise((resolve, reject) => {
get_chapter(parser, url)
.then(chapter => {
console.log(`Found Chapter "${chapter.title}", has next ${!!chapter.next}. Book ${
Math.floor(book.get_progress() * 1000) / 10
}% full.`);
let added = book.add_chapter(chapter);
if (!added) console.log("Book too big, stopping.");
return added && chapter.next ? get_chapter_recursive(parser, chapter.next, book) : book;
}).then(resolve)
.catch(reject);
});
}
let url = "";
get_chapter_recursive(new WuxiaParser(), url).then(book => {
book.save_to_file();
});