-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata.py
126 lines (124 loc) · 4.6 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# For each key in data
# KEY: Provider name
#
# VALUES:
# * re: tuple of (0, 1): 0: regex to match url validity
# 1: index to get ID of the gif that we will need to name files
#
# * op: A direct url that can use "ID" from "re" to download source directly
#
# * ext: extension of file
# - main: gif --> will be downloaded directly
# - else: mp4, webm, ... --> will be passed to ffmpeg to be converted to gif
#
# * scrape: if not None, then we add a script that can get url from HTML
# - Will be passed to eval()
# - Prefered to be short and efficent
# 15 Providers
data = \
{
'Giphy':{
're': (r'((http|https):\/\/)?(www\.)?giphy\.com\/gifs\/(([a-z-]+)(?<=-))?([a-zA-Z0-9-]+)', 6),
'op': 'https://media.giphy.com/media/{}/source.gif',
'ext': 'gif',
'scrape': None
},
'Tenor':{
're': (r'((http|https):\/\/)?(www\.)?tenor\.com\/view\/(.*)(?=-)-gif-(\d+)', 4),
'op': None,
'ext': 'gif',
'scrape': 'soup.find("meta", {"property":"og:image"})["content"]'
},
'Tenor : Source':{
're': (r'((http|https):\/\/)?media\d\.tenor\.co\/images\/([a-f0-9]+)\/tenor\.gif', 3),
'op': None,
'ext': 'gif',
'scrape': 'url_input'
},
'Gfycat':{
're': (r'((http|https):\/\/)?(www\.)?gfycat\.com\/([a-z-]+)', 4),
'op': None,
'ext': 'gif',
'scrape': 'soup.find("meta", {"property":"og:url"})["content"]'
},
'Reaction GIFs':{
're': (r'((http|https):\/\/)?(www\.)?reactiongifs\.com\/([a-z0-9-]+)', 4),
'op': None,
'ext': 'gif',
'scrape': 'soup.find("meta", {"property":"og:image"})["content"]'
},
'GIFbin':{
're': (r'((http|https):\/\/)?(www\.)?gifbin\.com\/(\d+)', 4),
'op': None,
'ext': 'mp4',
'scrape': 'soup.find("source", {"type":"video/mp4"})["src"]'
},
'Imgflip':{
're': (r'((http|https):\/\/)?(www\.)?imgflip\.com\/gif\/([a-z0-9]+)', 4),
'op': 'https://i-download.imgflip.com/{}.gif',
'ext': 'gif',
'scrape': None
},
'Tumblr':{
're': (r'((http|https):\/\/)?\w+\.tumblr\.com\/post\/(\d+)', 3),
'op': None,
'ext': 'gif',
'scrape': 'soup.find("meta", {"property":"og:image"})["content"][:-1]'
},
'Twitter':{
're': (r'((http|https):\/\/)?(www\.)?twitter\.com\/(.*)(?<=status\/)(\d+)', 5),
'op': None,
'ext': 'mp4',
'scrape': '"https://video.twimg.com/tweet_video/"+soup.find("meta",'
'{"property":"og:image"})["content"][:-4].split("/")[-1]+".mp4"'
},
'Imgur':{
're': (r'((http|https):\/\/)?i\.imgur\.com\/([A-Za-z0-9]+)', 3),
'op': None,
'ext': 'mp4',
'scrape': 'soup.find("meta", {"property":"og:video"})["content"]'
},
'Imgur : Source':{
're': (r'((http|https):\/\/)?(www\.)?imgur\.com\/(gallery|\w)\/([a-z]+\/)?([A-Za-z0-9]+)', 6),
'op': None,
'ext': 'mp4',
'scrape': 'url_input'
},
'Reddit':{
're': (r'((http|https):\/\/)?((www|old)\.)?reddit\.com\/r\/(.*)(?<=comments\/)([a-z0-9_\/]+)', 6),
'op': None,
'ext': 'gif',
'scrape': r'''(re.search(r'https:\/\/preview.redd.it\/([a-z0-9]+).gif\?s=[a-f0-9]+','''
'''requests.get(url_input+".json", headers={'User-Agent': 'Mozilla/5.0'}).text)).group(0)'''
},
'9GAG':{
're': (r'((http|https):\/\/)?(www\.)?9gag\.com\/\w+\/([A-Za-z0-9]+)', 4),
'op': 'https://img-9gag-fun.9cache.com/photo/{}_460sv.mp4',
'ext': 'mp4',
'scrape': None
},
'Sizzle':{
're': (r'((http|https):\/\/)?(www\.)?onsizzle\.com\/i\/([A-Za-z0-9-]+)(?=-)-([a-f0-9]+)', 4),
'op': None,
'ext': 'mp4',
'scrape': 'soup.find("source", {"type":"video/mp4"})["src"]'
},
'PANA.GIFS':{
're': (r'((http|https):\/\/)?(www\.)?panagif\.com\/gif\/([A-Za-z0-9-]+)', 4),
'op': None,
'ext': 'gif',
'scrape': 'soup.find("source", {"type":"image/gif"})["src"]'
},
'Gifer':{
're': (r'((http|https):\/\/)?(www\.)?gifer\.com(\/\w\w)?\/([A-Za-z0-9]+)', 5),
'op': 'https://i.gifer.com/embedded/download/{}.gif',
'ext': 'gif',
'scrape': None
},
'TikTok':{
're': (r'((http|https):\/\/)?(www\.)?tiktok\.com\/@(.*)(?<=video\/)(\d+)', 5),
'op': None,
'ext': 'mp4',
'scrape': 'soup.find("video")["src"]'
}
}