-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTwitterTool.py
289 lines (249 loc) · 11.9 KB
/
TwitterTool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/usr/bin/env python2.7
"""
A small utility to save or delete all of your personally-posted or liked tweets.
John Loeber | contact@johnloeber.com | January 13, 2017 | Python 2.7.6
"""
import dateutil.parser
import twitter
import json
import os
import sys
import logging
import argparse
import ConfigParser
import urllib
from time import time, sleep
from math import ceil
# to enable saving logs, consider e.g. .basicConfig(filename="twitter-tool-x.log")
# where x should be a unique identifier for this log (e.g. timestamp).
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# we will set the twitter api session in main(), this is to instantiate it as a global
api = None
def download_media(folder_path, media_url, fallback_filename):
"""
Download a media file contained in a tweet.
- folder_path: string, the name of the folder in which to save the file
- media_url: string, url to the media file
- fallback_filename:
"""
logging.info("Preparing to download media: " + media_url)
if "/media/" in media_url:
# I am not entirely sure if all media_urls contain "/media/", hence this conditional
media_suffix = media_url[media_url.index("/media/")+7:]
else:
extension = "." + media_url.split(".")[-1]
media_suffix = fallback_filename + extension
file_path = folder_path + "/" + media_suffix
# often, the media item in 'entities' is also in 'extended_entities'. don't download twice.
if not media_suffix in os.listdir(folder_path):
logging.info("Downloading media: " + media_url)
urllib.urlretrieve(media_url, file_path)
else:
logging.info("Skipped duplicate media download: " + media_url)
def archive_single_tweet(tweet, archive_name, id_str, media):
"""
Archives a single tweet.
- tweet: twitter.Status, representing the tweet
- archive_name: string, the folder in which this tweet is to have its archive sub-folder
- id_str: string, the tweet's unique identifier
- media: boolean, save attached media if True.
"""
tweet_as_dict = tweet.AsDict()
logging.info("Archiving tweet id: " + id_str)
created_at = dateutil.parser.parse(tweet_as_dict['created_at'])
folder_name = created_at.strftime("%Y-%m-%d-%H:%M:%S") + "-" + id_str
folder_path = archive_name + "/" + folder_name
if os.path.exists(folder_path):
logging.info("Trying to archive tweet: " + folder_name + "\n\tArchive folder already exists. Proceeding anyway.")
else:
os.makedirs(folder_path)
file_name = "tweet-" + id_str
file_path = folder_path + "/" + file_name + ".json"
tweet_as_json = tweet.__dict__['_json']
with open(file_path, "w") as f:
json.dump(tweet_as_json, f, indent=4, sort_keys=True, separators=(',', ':'))
if media:
# handle media attachments
if 'media' in tweet_as_json['entities']:
tweet_entities_media = tweet_as_json['entities']['media']
for media_index, media_item in enumerate(tweet_entities_media):
fallback_file_name = "media_" + str(media_index)
download_media(folder_path, media_item['media_url'], fallback_file_name)
if 'extended_entities' in tweet_as_json:
if 'media' in tweet_as_json['extended_entities']:
tweet_ee_media = tweet_as_json['extended_entities']['media']
for media_index, media_item in enumerate(tweet_ee_media):
fallback_file_name = "extended_media_" + str(media_index)
download_media(folder_path, media_item['media_url'], fallback_file_name)
def handle_single_liked_tweet(tweet, archive, delete, media):
"""
archives or deletes a single linked tweet.
- tweet: twitter.Status, representing the tweet
- archive: boolean, saving the tweet if True
- delete: boolean, un-liking the tweet if True
- media: boolean, saving the tweet's media if True (and if archive is True)
"""
id_str = tweet.__dict__['id_str']
logging.info("Handling tweet id: " + id_str)
if archive:
archive_name = "Archive-Liked-Tweets"
archive_single_tweet(tweet, archive_name, id_str, media)
if delete:
logging.info("Un-liking tweet: " + id_str)
api.DestroyFavorite(status_id=tweet.__dict__['id'])
def handle_single_personal_tweet(tweet, archive, delete, media):
"""
archives or deletes a single personal tweet.
- tweet: twitter.Status, representing the tweet
- archive: boolean, saving the tweet if True
- delete: boolean, deleting the tweet if True
- media: boolean, saving the tweet's media if True (and if archive is True)
"""
id_str = tweet.__dict__['id_str']
logging.info("Handling tweet id: " + id_str)
if archive:
archive_name = "Archive-Personal-Tweets"
archive_single_tweet(tweet, archive_name, id_str, media)
if delete:
logging.info("Deleting tweet: " + id_str)
api.DestroyStatus(status_id=tweet.__dict__['id'])
def handle_liked_tweets(archive, delete, media):
"""
archives or deletes as many liked tweets as possible. (see README for limits.)
- archive: boolean, saving the tweets if True
- delete: boolean, un-liking the tweets if True
- media: boolean, saving each tweet's media if True (and if archive is True)
"""
if not os.path.exists("Archive-Liked-Tweets"):
os.makedirs("Archive-Liked-Tweets")
liked_ratelimit = api.CheckRateLimit("https://api.twitter.com/1.1/favorites/list.json")
remaining = liked_ratelimit.remaining
reset_timestamp = liked_ratelimit.reset
logging.info("Rate Limit Status: " + str(remaining) + " calls to `favorites` remaining in this 15-minute time period.")
if remaining > 0:
logging.info("Retrieving a new batch of favorites!")
favorites = api.GetFavorites(count=200)
for favorite in favorites:
handle_single_liked_tweet(favorite, archive, delete, media)
if len(favorites) == 0:
logging.info("There are no more liked tweets to handle!")
else:
handle_liked_tweets(archive, delete, media)
else:
logging.info("Rate limit has been hit! Sleeping until rate limit resets.")
seconds_until_reset = int(ceil(time() - reset_timestamp))
sleep(seconds_until_reset)
handle_liked_tweets(archive, delete, media)
def handle_personal_tweets(archive, delete, media):
"""
archives or deletes as many personal tweets as possible. (see README for limits.)
- archive: boolean, saving the tweets if True
- delete: boolean, deleting the tweets if True
- media: boolean, saving each tweet's media if True (and if archive is True)
"""
if not os.path.exists("Archive-Personal-Tweets"):
os.makedirs("Archive-Personal-Tweets")
usertimeline_ratelimit = api.CheckRateLimit("https://api.twitter.com/1.1/statuses/user_timeline.json")
remaining = usertimeline_ratelimit.remaining
reset_timestamp = usertimeline_ratelimit.reset
logging.info("Rate Limit Status: " + str(remaining) + " calls to `user_timeline` remaining in this 15-minute time period.")
if remaining > 0:
logging.info("Retrieving a new batch of personal tweets!")
tweets = api.GetUserTimeline(count=200)
for tweet in tweets:
handle_single_personal_tweet(tweet, archive, delete, media)
if len(tweets) == 0:
logging.info("There are no more personal tweets to handle!")
else:
handle_personal_tweets(archive, delete, media)
else:
logging.info("Rate limit has been hit! Sleeping until rate limit resets.")
seconds_until_reset = int(ceil(time() - reset_timestamp))
sleep(seconds_until_reset)
handle_personal_tweets(archive, delete, media)
def arguments_and_confirm():
"""
handle the user's command-line arguments, ensure input is valid,
confirm the user's intention.
"""
parser = argparse.ArgumentParser(description='See README for help with running this program.')
group = parser.add_mutually_exclusive_group()
group.add_argument("--liked", help="use this flag to handle liked/favorited tweets.",
action="store_true", default=False)
group.add_argument("--posted", help="use this flag to handle tweets that you have authored (retweets included).",
action="store_true", default=False)
parser.add_argument("--archive", help="use this flag to archive (save) tweets.",
action="store_true", default=False)
parser.add_argument("--delete", help="use this flag to delete/un-like tweets.",
action="store_true", default=False)
parser.add_argument("--media", help="use this flag to save media files attached to tweets, if archiving.",
action="store_true", default=False)
args = parser.parse_args()
if not (args.posted or args.liked):
raise ValueError("You must supply either the --posted or --liked flag to specify whether "
"you want to handle the tweets that you made/retweeted, or the tweets you liked. "
"\nPlease see README for instructions.")
elif (not args.archive) and args.media:
raise ValueError("You have selected not to archive, but to save media. This is impossible. "
"You can only save media if you're archiving.\nPlease see README for instructions.")
elif not (args.archive or args.delete):
raise ValueError("You must supply at least one of the --archive or --delete flags, to "
"specify what you want to do with the selected tweets. "
"\nPlease see README for instructions.")
else:
option_string = "You have selected: "
if args.archive:
option_string += "to ARCHIVE "
if args.media:
option_string += "(and save media files) "
if args.delete:
if args.posted:
option_string += "and DELETE "
else:
option_string += "and UN-LIKE "
else:
if args.posted:
option_string += "to DELETE "
else:
option_string += "to UN-LIKE "
if args.posted:
liked_or_personal = 'personal'
option_string += "ALL tweets you have POSTED (including retweets)."
else:
liked_or_personal = 'liked'
option_string += "ALL tweets you have LIKED."
print option_string
while True:
# loop in case the user does not confirm correctly.
confirm = raw_input("Please confirm. Yes/No\n").lower()
if len(confirm) >= 1:
if confirm[0] == 'y':
return liked_or_personal, args.archive, args.delete, args.media
elif confirm[0] == 'n':
sys.exit(0)
def credentials_and_authenticate():
"""
parse credentials from credentials.txt and authenticate with Twitter.
"""
config = ConfigParser.ConfigParser()
config.read('credentials.txt')
consumer_key = config.get('TWITTER-TOOL', 'consumer_key')
consumer_secret = config.get('TWITTER-TOOL', 'consumer_secret')
access_token_key = config.get('TWITTER-TOOL', 'access_token_key')
access_token_secret = config.get('TWITTER-TOOL', 'access_token_secret')
global api
api = twitter.Api(consumer_key=consumer_key,
consumer_secret=consumer_secret,
access_token_key=access_token_key,
access_token_secret=access_token_secret)
# returning api instead of None so that it's possible to import and use this from the REPL.
return api
def main():
tweet_type, archive, delete, media = arguments_and_confirm()
credentials_and_authenticate()
if tweet_type == "personal":
handle_personal_tweets(archive, delete, media)
else:
handle_liked_tweets(archive, delete, media)
if __name__ == '__main__':
main()