-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDirDownld.py
110 lines (95 loc) · 4.58 KB
/
DirDownld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from os import makedirs
from os.path import basename
from os.path import join
from pathlib import Path
from urllib.request import urlopen
from urllib.parse import urljoin
from bs4 import BeautifulSoup
import time
graphicart = """"
-------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------------------
______ ___________ _____ _____ _____ _____________ __ ______ _____ _ _ _ _ _ _____ ___ ______ ___________
| _ \_ _| ___ \ ___/ __ \_ _| _ | ___ \ \ / / | _ \ _ || | | | \ | || | | _ |/ _ \| _ \ ___| ___ |
| | | | | | | |_/ / |__ | / \/ | | | | | | |_/ /\ V / | | | | | | || | | | \| || | | | | / /_\ \ | | | |__ | |_/ /
| | | | | | | /| __|| | | | | | | | / \ / | | | | | | || |/\| | . ` || | | | | | _ | | | | __|| /
| |/ / _| |_| |\ \| |___| \__/\ | | \ \_/ / |\ \ | | | |/ /\ \_/ /\ /\ / |\ || |___\ \_/ / | | | |/ /| |___| |\ \
|___/ \___/\_| \_\____/ \____/ \_/ \___/\_| \_| \_/ |___/ \___/ \/ \/\_| \_/\_____/\___/\_| |_/___/ \____/\_| \_|
-------------------------------------------------------------------------------------------------------------------------
M@d3 With ♥ -- VirusZzWarning
Read My Blogs : https://viruszzwarning.medium.com/
-------------------------------------------------------------------------------------------------------------------------
|| This tool will crawl through the link and download any types of files found ||
-------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------------------
"""
for x in graphicart.splitlines():
print(x)
time.sleep(0.5)
# load a file from a URL, returns content of downloaded file
def download_url(urlpath):
# open a connection to the server
with urlopen(urlpath) as connection:
# read the contents of the url as bytes and return it
return connection.read()
# decode downloaded html and extract all <a href=""> links
def get_urls_from_html(content):
# decode the provided content as ascii text
html = content.decode('utf-8')
# parse the document as best we can
soup = BeautifulSoup(html, 'html.parser')
# find all all of the <a href=""> tags in the document
atags = soup.find_all('a')
# get all href values (links) or None if not present (unlikely)
return [t.get('href', None) for t in atags]
# save provided content to the local path
def save_file(path, data):
# open the local file for writing
with open(path, 'wb') as file:
# write all provided data to the file
file.write(data)
# download one file to a local directory
def download_url_to_file(url, link, path):
# skip bad urls or bad filenames
if link is None or link == '../':
return (link, None)
# check for no file extension
if not (link[-4] == '.' or link[-3] == '.' ):
return (link, None)
# convert relative link to absolute link
absurl = urljoin(url, link)
# download the content of the file
data = download_url(absurl)
# get the filename
filename = basename(absurl)
# construct the output path
outpath = join(path, filename)
# save to file
save_file(outpath, data)
# return results
return (link, outpath)
# download all files on the provided webpage to the provided path
def download_all_files(url, path):
# download the html webpage
data = download_url(url)
# create a local directory to save files
makedirs(path, exist_ok=True)
# parse html and retrieve all href urls listed
links = get_urls_from_html(data)
# report progress
print(f'Found {len(links)} links in {url}')
# download each file on the webpage
for link in links:
# download the url to a local file
link, outpath = download_url_to_file(url, link, path)
# check for a link that was skipped
if outpath is None:
print(f'>skipped {link}')
else:
print(f'Downloaded {link} to {outpath}')
# url of html page that lists all files to download
URL = input("Enter link")
# local directory to save all files on the html page
PATH = str(Path.home() / "Downloads" /"DirDownloader")
# download all files on the html webpage
download_all_files(URL, PATH)