-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathREST_gatherer.py
38 lines (29 loc) · 882 Bytes
/
REST_gatherer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import requests, json
import time
url = "http://search.twitter.com/search.json?q=nikeplus&rpp=100&since_id=%s&include_entities=true&result_type=mixed"
previous_max_id = ""
sleep = 1
# Get where we last left off
file = open("out_tweets.txt",'r')
for line in file:
tweet = json.loads(line)
previous_max_id = tweet["id_str"]
# Data collection loop Sleep for a minute after 2 calls
while True:
r = requests.get(url%(previous_max_id))
if r.status_code == 200:
reply = None
try:
reply = r.json()
except:
reply = r.json
previous_max_id = reply['max_id_str']
file = open("out_tweets.txt", 'a')
for tweet in reply['results']:
file.write(json.dumps(tweet, separators=(',',':'))+'\n')
file.close()
print "Request number: %d Number of results: %d"%(sleep+1, len(reply['results']))
else:
print r.status_code
sleep ^= 1
if sleep: time.sleep(60)