-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
61 lines (52 loc) · 2.05 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from selenium import webdriver
import urllib.request
from time import sleep
class myBot:
def __init__(self):
self.driver = webdriver.Chrome()
self.driver.maximize_window()
# user profile
self.driver.get("https://instagram.com/username")
sleep(15)
# scroll function
lengthOfPage = self.driver.execute_script(
"window.scrollTo(0, document.body.scrollHeight); var lengthOfPage = document.body.scrollHeight; return lengthOfPage;"
)
match = False
while match == False:
lastCount = lengthOfPage
sleep(5)
lengthOfPage = self.driver.execute_script(
"window.scrollTo(0, document.body.scrollHeight); var lengthOfPage = document.body.scrollHeight; return lengthOfPage;"
)
# scroll to the bottom
if lastCount == lengthOfPage:
match = True
posts = []
links = self.driver.find_elements_by_tag_name("a")
# scrapping posts
for link in links:
post = link.get_attribute("href")
if "/p/" in post:
posts.append(post)
print(posts)
download_url = ""
# download posts
for post in posts:
self.driver.get(post)
shortcode = self.driver.current_url.split("/")[-2]
type = self.driver.find_element_by_xpath(
'//meta[@property="og:type"]'
).get_attribute("content")
if type == "video":
download_url = self.driver.find_element_by_xpath(
'//meta[@property="og:video"]'
).get_attribute("content")
urllib.request.urlretrieve(download_url, "{}.mp4".format(shortcode))
else:
download_url = self.driver.find_element_by_xpath(
'//meta[@property="og:image"]'
).get_attribute("content")
urllib.request.urlretrieve(download_url, "{}.jpg".format(shortcode))
print(download_url)
myapp = myBot()