-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawler.py
38 lines (27 loc) · 926 Bytes
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import requests
import json
def crawl(_id, _filename):
URL = "https://store.steampowered.com/appreviews/"+str(_id)
PARAMS = {
'json': 1,
'cursor': '*',
'day_range': 9223372036854775807
}
data = []
r = requests.get(url=URL, params=PARAMS)
data1 = r.json()
print("Found total matching results: "+ str(data1['query_summary']['total_reviews']))
print("retrieved "+str(len(data1['reviews']))+" reviews")
data.extend(data1['reviews'])
for i in range(250):
r = requests.get(url=URL, params=PARAMS)
data1 = r.json()
PARAMS['cursor'] = data1['cursor']
print("retrieved " + str(len(data1['reviews'])) + " more reviews")
data.extend(data1['reviews'])
res = []
for i in data:
if i not in res:
res.append(i)
with open('data/'+_filename+'.txt', 'w') as outfile:
json.dump(res, outfile)