-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMRS-PDFbot.config
80 lines (66 loc) · 3.94 KB
/
MRS-PDFbot.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# MRS.PDFbot CONFIGURATION FILE
# ----------------------------------------------------------------------------------------
# wget2 support = True, otherwise False is just standard wget
# ----------------------------------------------------------------------------------------
wget2 = True
# ----------------------------------------------------------------------------------------
# General settings:
# URL_List The file where a list of URL (inputs) are kept
# file number. By default MRS.PDFbot just uploads without asking
# startpage A single URL with which to start at. Added to any given URL list
# use reject list Whether to reject links/websites specified in a text file with
# a list of reject URLS
# reject file The text file where there is a list of URLs to reject
# ----------------------------------------------------------------------------------------
URL_List = url_file.txt
startpage = ""
use reject list? = True
reject file = reject-list.txt
# ----------------------------------------------------------------------------------------
# Filetypes. Whilst this is designed for PDFs, feel free to modify this. Just make sure
# the filetype matches up with the mediatype on archive.org, otherwise we will all be
# exceptionally sad.
# images (jpeg, jpg, tif, png, gif...) mediatype: images
# documents (pdf, txt...) mediatype: texts
# ----------------------------------------------------------------------------------------
filetype = pdf
mediatype = texts
# ----------------------------------------------------------------------------------------
# wget quota is bytes (b), kilobytes (k) or megabytes (m). This is NOT the total sum of
# all PDFs, but a representation of the max amount of data downloaded. For example, if
# a 1 MB html file is downloaded (to find links) and then rejected as it is not a PDF
# file, then this 1 MB will count towards your quota.
# ----------------------------------------------------------------------------------------
quotastring = 5000m
user-agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
# ----------------------------------------------------------------------------------------
# archive.org name or names to use in the "Contributor" field. Not required by default.
# If using, remember none of the names are hyperlinked on the Archive, therefore
# consider it a standard string - i.e. you don't have to put a comma between multiple
# names as it would just be a stylistic choice.
# ----------------------------------------------------------------------------------------
contributor = "MRS.PDFbot"
# ----------------------------------------------------------------------------------------
# confirm_uploads checks before uploading to archive.org by providing a total
# file number. By default MRS.PDFbot just uploads without asking
# clean_up clean_up function removes empty directories and moves PDFs
# into a single folder. Default is "Y"(es)
# uploaded_pdfs_folder the folder into which PDFs will be moved. If left blank will
# default
# local upload record True or False; whether to keep a record of uploads
# upload record file the file (txt) where a record of uploaded files is kept. Very
# useful to reduce bandwith and time wasted re-uploading
# duplicates
# ----------------------------------------------------------------------------------------
confirm_uploads = N
clean_up = Y
uploaded_pdfs_folder = uploaded_PDFs
local upload record? = True
upload record file = upload_record.txt
# ----------------------------------------------------------------------------------------
# General Settings:
# pdf_archive_dir: This is the directory where PDFs are moved to post-upload to
# the ia. Default = "uploaded_PDFs". Do not include " or '.
# ----------------------------------------------------------------------------------------
pdf_archive_dir = uploaded_PDFs
use_pdftk = True