forked from FS-CSCI150-F21/FS-CSCI150-F21-Team4
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
550afe5
commit 6445ac0
Showing
7 changed files
with
432 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
__pycache__/ | ||
.pyc | ||
.DS_Store | ||
.DS_Store | ||
jacobENV | ||
env |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import csv | ||
import numpy as np | ||
import pandas as pd | ||
import re | ||
import nltk | ||
import pickle | ||
from nltk.corpus import stopwords | ||
from sklearn.feature_extraction.text import TfidfVectorizer | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.linear_model import LogisticRegression | ||
|
||
features = [] | ||
labels = [] | ||
|
||
with open("training.1600000.processed.noemoticon.csv", mode='r') as csvOut: | ||
outfile = csv.reader(csvOut, delimiter=',') | ||
initialLine = True | ||
for row in outfile: | ||
if initialLine == False: | ||
initialLine = True | ||
else: | ||
features.append(row[5]) | ||
labels.append(row[0]) | ||
|
||
processed_features = [] | ||
valRange = 1 | ||
numFeatStart = int(len(features)* (.5-valRange/2)) | ||
numFeatEnd = int(len(features)* (.5+valRange/2)) | ||
scaleLabel = labels[numFeatStart:numFeatEnd] | ||
|
||
for sentence in range(numFeatStart, numFeatEnd): | ||
# Remove all the special characters | ||
processed_feature = re.sub(r'\W', ' ', str(features[sentence])) | ||
|
||
# remove all single characters | ||
processed_feature= re.sub(r'\s+[a-zA-Z]\s+', ' ', processed_feature) | ||
|
||
# Remove single characters from the start | ||
processed_feature = re.sub(r'\^[a-zA-Z]\s+', ' ', processed_feature) | ||
|
||
# Substituting multiple spaces with single space | ||
processed_feature = re.sub(r'\s+', ' ', processed_feature, flags=re.I) | ||
|
||
# Removing prefixed 'b' | ||
processed_feature = re.sub(r'^b\s+', '', processed_feature) | ||
|
||
# Converting to Lowercase | ||
processed_feature = processed_feature.lower() | ||
|
||
processed_features.append(processed_feature) | ||
|
||
print("Finished Text Processing") | ||
vectorizer = TfidfVectorizer(max_features=2500, min_df=7, max_df=0.8, stop_words=stopwords.words('english')) | ||
processed_features = vectorizer.fit_transform(processed_features).toarray() | ||
with open("vectorizer.pickle", "wb") as pickle_out: | ||
pickle.dump(vectorizer, pickle_out) | ||
print(processed_features.shape) | ||
print("Splitting Tests") | ||
X_train, X_test, y_train, y_test = train_test_split(processed_features, scaleLabel, test_size=0.2, random_state=0) | ||
|
||
print("Fitting Logistic Regression") | ||
text_classifier = LogisticRegression(random_state=0) | ||
text_classifier.fit(X_train, y_train) | ||
with open("LogisticRegClass.pickle", "wb") as pickle_out: | ||
pickle.dump(text_classifier, pickle_out) | ||
print("Fitting Completed") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from twitterScrapeV1 import twitterMentionFunct, tweetFormatJson | ||
import json | ||
import re | ||
import pickle | ||
|
||
def tweetSentimentAnalyzer(userName, totalTweets): | ||
tweetData = twitterMentionFunct(userName=userName, tweetAmount=totalTweets) | ||
tweetFormatJson("tweetText.json",tweetData) | ||
|
||
with open("vectorizer.pickle", "rb") as pickle_in: | ||
processedVector = pickle.load(pickle_in) | ||
|
||
with open("LogisticRegClass.pickle", "rb") as pickle_in: | ||
logicRegClass = pickle.load(pickle_in) | ||
|
||
with open('tweetText.json', encoding='utf-8') as infile: | ||
tweetJson = json.load(infile) | ||
|
||
testEntries = tweetJson['data'] | ||
testProcessed = [] | ||
for sentence in testEntries: | ||
sentence = sentence['text'] | ||
# Remove all the special characters | ||
processed_feature = re.sub(r'\W', ' ', sentence) | ||
|
||
# remove all single characters | ||
processed_feature= re.sub(r'\s+[a-zA-Z]\s+', ' ', processed_feature) | ||
|
||
# Remove single characters from the start | ||
processed_feature = re.sub(r'\^[a-zA-Z]\s+', ' ', processed_feature) | ||
|
||
# Substituting multiple spaces with single space | ||
processed_feature = re.sub(r'\s+', ' ', processed_feature, flags=re.I) | ||
|
||
# Removing prefixed 'b' | ||
processed_feature = re.sub(r'^b\s+', '', processed_feature) | ||
|
||
# Converting to Lowercase | ||
processed_feature = processed_feature.lower() | ||
|
||
testProcessed.append(processed_feature) | ||
|
||
processed_features = processedVector.transform(testProcessed).toarray() | ||
prediction = logicRegClass.predict(processed_features) | ||
print(processed_features.shape) | ||
print(prediction) | ||
predictionList= prediction.tolist() | ||
possitiveTweetsTot = predictionList.count('4') | ||
negativeTweetsTot = predictionList.count('0') | ||
print(f"Number of Positive tweets: {possitiveTweetsTot}") | ||
print(f"Number of Negative tweets: {negativeTweetsTot}") | ||
return {"tweet_postive": possitiveTweetsTot, "tweet_negative": negativeTweetsTot} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
{ | ||
"data": [ | ||
{ | ||
"id": "1459741984906690565", | ||
"text": "What if @FoxStevensonNow finds out????? 🤣🤣🤣🤣 https://t.co/hDT79K8MQa" | ||
}, | ||
{ | ||
"id": "1459701494689710082", | ||
"text": "@FoxStevensonNow Hope you're having an amazing day!" | ||
}, | ||
{ | ||
"id": "1459664659276046345", | ||
"text": "Thinking about @FoxStevensonNow’s set at Ministry of Sound the other week https://t.co/7j5eFe3KlO" | ||
}, | ||
{ | ||
"id": "1459602779110993920", | ||
"text": "@itspaceboss @FoxStevensonNow Dude. Yes." | ||
}, | ||
{ | ||
"id": "1459563352183742469", | ||
"text": "WHEN TF WILL I SEE @FoxStevensonNow ON A FESTIVAL LINEUP ?!?!?" | ||
}, | ||
{ | ||
"id": "1459531436764053504", | ||
"text": "NOW IT'S TIME FOR THE VIP MIX!! @FoxStevensonNow & @CurbiOfficial - Hoohah (VIP Mix) PIANO COVER LETS GOOOO\n\nhttps://t.co/QBv3OTMR7O" | ||
}, | ||
{ | ||
"id": "1459529175446994950", | ||
"text": "@FoxStevensonNow @grabbitz @Griz your musics keep me dancing ♥️🤘🏻 https://t.co/t02otWfyLF" | ||
}, | ||
{ | ||
"id": "1459463690990891008", | ||
"text": "What’s the first ever @FoxStevensonNow song you guys heard? I heard Sandblast not long after it was released and played it on repeat for about a month 🙈 https://t.co/QmdqKXKuTJ" | ||
}, | ||
{ | ||
"id": "1459397723266949123", | ||
"text": "@PilotRecordsUK dude when i see @FoxStevensonNow s good time in stores for the first time ima cry!" | ||
}, | ||
{ | ||
"id": "1459386877702774785", | ||
"text": "@FoxStevensonNow https://t.co/FznKa7uYwT" | ||
}, | ||
{ | ||
"id": "1459314764249804802", | ||
"text": "@FuntCaseUK @FoxStevensonNow 4 inspiring me to make music" | ||
}, | ||
{ | ||
"id": "1459305433227743244", | ||
"text": "My evening was f*ing great @FoxStevensonNow . The day before lockdown 3.0! https://t.co/Z3661Df1km" | ||
}, | ||
{ | ||
"id": "1459016262231732247", | ||
"text": "@FoxStevensonNow \n\nFox live 2020: shortly after, corona time and lockdown in NL \nFox live 2021: shortly after, new lockdown in NL \n\nHmm 🤔" | ||
}, | ||
{ | ||
"id": "1458856882949472265", | ||
"text": "@UKF @FoxStevensonNow flat foot face" | ||
}, | ||
{ | ||
"id": "1458778563562332160", | ||
"text": "@PilotRecordsUK @FoxStevensonNow" | ||
} | ||
], | ||
"meta": { | ||
"oldest_id": "1458778563562332160", | ||
"newest_id": "1459741984906690565", | ||
"result_count": 15, | ||
"next_token": "7140dibdnow9c7btw3z2vwkh1h8yxd37jq43mhkug9168" | ||
} | ||
} |
Oops, something went wrong.