-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
56 lines (46 loc) · 2.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import date
URL = "https://nextspaceflight.com/launches/past/?search="
response = requests.get(URL)
data = response.text
soup = BeautifulSoup(data, "html.parser")
last_page_soup = soup.select_one('.mdc-button--raised:-soup-contains("last »")')
last_page = int(last_page_soup.get('href').split('=')[1].split('&')[0])
final_data = []
for page_num in range(1, last_page+1):
response = requests.get(f"https://nextspaceflight.com/launches/past/?page={page_num}&search=")
data = response.text
soup = BeautifulSoup(data, "html.parser")
details_soup = soup.select('h5')
datetime_and_location_soup = soup.select('.mdl-card__supporting-text')
mission_details_link_soup = soup.select('.mdc-button:-soup-contains("Details")')
for i in range(len(mission_details_link_soup)):
details_url = mission_details_link_soup[i].get('href')
response = requests.get(f"https://nextspaceflight.com{details_url}")
data = response.text
soup = BeautifulSoup(data, "html.parser")
mission_status = soup.select_one('h6')
organization = soup.select_one('.a:first-child .mdl-cell:first-child')
status = soup.select_one('.a:first-child .mdl-cell:nth-of-type(2)')
datetime_and_location_split = datetime_and_location_soup[i].get_text(strip=True, separator="#").split('#')
price = soup.select_one('.a:first-child .mdl-cell:nth-of-type(3)')
if "$" in price.get_text():
try:
price_value = float(price.get_text(strip=True).split('$')[1].split(' ')[0])
except ValueError:
price_value = ""
else:
price_value = ""
record = {
"Organization": organization.get_text(strip=True),
"Location": datetime_and_location_split[1],
"Datetime": datetime_and_location_split[0],
"Details": details_soup[i].get_text(strip=True),
"Status": status.get_text(strip=True).split(': ')[1],
"Price": price_value,
"Mission_status": mission_status.get_text(strip=True)
}
final_data.append(record)
pd.DataFrame(final_data).to_csv(f"mission_launches_updated_{date.today()}.csv")