-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAllcroller.py
156 lines (117 loc) · 4.72 KB
/
Allcroller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import tickertick as tt
import tickertick.query as query
import requests
from bs4 import BeautifulSoup
from soup2dict import convert
import json
from datetime import datetime
import yfinance as yf
import pandas as pd
from tqdm import tqdm
def update_time(d):
if isinstance(d, dict):
for key, value in d.items():
if key == 'time':
d[key] = datetime.fromtimestamp(d[key]//1000) # 새로운 값으로 변경 (예시로 10을 사용)
elif isinstance(value, (dict, list)):
update_time(value)
elif isinstance(d, list):
for item in d:
update_time(item)
def split_ticker(d):
for story in d['stories']:
if isinstance(story['tickers'], list) and len(story['tickers']) == 1:
story['tickers'] = story['tickers'][0]
def get_news(ticker,cnt):
url = f"https://api.tickertick.com/feed?q=z:{ticker}&n={cnt}"
response = requests.get(url)
if response.status_code == 200:
html = response.text
soup = BeautifulSoup(html, 'html.parser')
dict_result = convert(soup)
navigable_string = dict_result["navigablestring"][0]
parsed_data = json.loads(navigable_string)
update_time(parsed_data)
split_ticker(parsed_data)
for story in parsed_data.get('stories', []):
del story['description']
del story['tags']
del story['site']
if 'last_id' in parsed_data:
del parsed_data['last_id']
# JSON 파일로 저장
with open(f'{ticker}.json', 'w', encoding='utf-8') as output_file:
json.dump(parsed_data, output_file, default=str,indent=2, ensure_ascii=False)
else :
print(response.status_code)
file_path = f'{ticker}.json'
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
return parsed_data
def extract_tickers_from_json(file_path):
tickers = []
with open(file_path, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
for company_data in data:
tickers.append(company_data['ticker'])
return tickers
def main():
crollingurl="https://companiesmarketcap.com/"
response = requests.get(crollingurl)
if response.status_code == 200:
html = response.text
soup = BeautifulSoup(html, 'html.parser')
data = []
for td in soup.find_all('td', class_='name-td'):
company_name = td.find('div', class_='company-name').text.strip()
company_code = td.find('div', class_='company-code').text.strip()
company_logo = td.find('img', class_='company-logo')['src'].strip()
data.append({
'name': company_name,
'ticker': company_code,
'logoUrl': "https://companiesmarketcap.com/"+company_logo
})
idx = 0
for td in soup.find_all('td', class_='td-right'):
price = td.text.strip()
if price.startswith('$') and not price[-1].isalpha():
data[idx]['price'] = price
idx+=1
td_tags = soup.find_all('td', class_='rh-sm')
idx=0
for td_tag in td_tags:
span_tag = td_tag.find('span')
if span_tag:
if 'percentage-red' in span_tag['class']:
today = '-' + span_tag.get_text(strip=True).split('-')[-1]
elif 'percentage-green' in span_tag['class']:
today = '+' + span_tag.get_text(strip=True).split('+')[-1]
data[idx]['today'] = today
idx += 1
with open('Toplist.json', 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=4, ensure_ascii=False)
else:
print(response.status_code)
top_url = 'http://localhost:8080/stocks/top100'
headers = {'Content-Type': 'application/json; charset=utf-8'}
post_response = requests.post(top_url,headers=headers,json=data)
if post_response.status_code == 204:
print('complete')
else:
print("error")
print("Starting News Post")
ticker_list=extract_tickers_from_json('Toplist.json')
news_url = "http://localhost:8080/news"
with tqdm(total=len(ticker_list)) as pbar:
for i in ticker_list:
#data=get_news(i,10)
path = f'test\\{i}.json'
with open(path, 'r', encoding='utf-8') as file:
data = json.load(file)
response = requests.post(news_url, headers=headers, json=data)
if response.status_code!=204:
print(f'error occur : {response.status_code}')
break
pbar.update(1)
if __name__ == "__main__":
main()