-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatamine.py
94 lines (69 loc) · 2.98 KB
/
datamine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!usr/bin/python
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from textblob import TextBlob
import json
import ConfigParser
import datetime
import re
import time
import processText as pt
import plotlySetup
import connectSQL
config = ConfigParser.ConfigParser()
config.read('/Users/JoeK/config_files/owconfig')
stopwords = pt.getStopWordList(config.get('stopwords', 'stopword_file'))
access_token = config.get('twittertokens', 'access_token')
access_token_secret = config.get('twittertokens', 'access_token_secret')
consumer_key = config.get('twittertokens', 'consumer_key')
consumer_secret = config.get('twittertokens', 'consumer_secret')
# prints status text
class StdOutListener(StreamListener):
def __init__(self):
super(StdOutListener, self).__init__()
self.accumulated_sentiment = 0.0
def on_status(self, status):
print status.text
def on_data(self, data):
tweet = json.loads(data)
# excludes official retweets
if 'text' in tweet and not tweet['retweeted'] and tweet['lang'] == 'en':
# stops function if a manual retweet
if re.search('RT @', tweet['text']):
return
text = pt.processTweet(tweet['text'])
text = pt.replaceTwoOrMore(text)
feature_vector = pt.getFeatureVector(text)
for word in feature_vector:
if word in stopwords:
feature_vector.remove(word)
# use for testing purposes
text = ' '.join(feature_vector)
tweet_date, tweet_time = pt.parseDateTime(tweet['created_at'])
sentiment_analysis = TextBlob(text)
self.accumulated_sentiment += sentiment_analysis.sentiment.polarity
# persist data in MySQL
connectSQL.insertSQL(Tweets, session, tweet_date, tweet_time, sentiment_analysis.sentiment.polarity,
sentiment_analysis.sentiment.subjectivity, text)
x = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
y = self.accumulated_sentiment
plotly_stream.write(dict(x=x, y=y))
time.sleep(1)
print 'Accumulated Sentiment:', self.accumulated_sentiment, 'Twitter User ID:', tweet['user']['id']
def on_error(self, status):
if status == 420:
# returning False in on_data disconnects the stream
return False
if __name__ == '__main__':
# handles Twitter authentication and the connection to Twitter Streaming API
listener = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
# creates a twitter stream object
twitter_stream = Stream(auth, listener)
# creates a plotly stream object
plotly_stream = plotlySetup.setupPlotly(600)
# setup MySQL server connection
Tweets, session = connectSQL.connectSQL(config)
twitter_stream.filter(track=['overwatch'])