-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrench_game_tweet_toxicity_web_app.py
61 lines (43 loc) · 1.44 KB
/
french_game_tweet_toxicity_web_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from flask import Flask, request, jsonify, render_template
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
import nltk
from nltk.corpus import stopwords
import string
from nltk.stem.porter import PorterStemmer
app = Flask(__name__)
tfidf = TfidfVectorizer(max_features=3000)
vectorizer = pickle.load(open('vectorizer', 'rb'))
model = pickle.load(open('game_tweet_model.pkl', 'rb'))
nltk.download('stopwords')
nltk.download('punkt')
ps = PorterStemmer()
def transform_text(text):
text = text.lower() # 1. Convert to lowercase
text = nltk.word_tokenize(text) # 2. Tokenize
y = []
for i in text:
if i.isalnum(): # 3. Remove special characters
y.append(i)
text = y[:]
y.clear()
for i in text:
if i not in stopwords.words('english') and i not in string.punctuation: # 4. Remove stopwords and punctuation
y.append(i)
text = y[:]
y.clear()
for i in text:
y.append(ps.stem(i)) # 5. Stemming
return " ".join(y)
@app.route('/')
def home():
return render_template('index.html')
@app.route('/predict', methods=['POST'])
def predict():
text = request.form.get('text')
tweet = transform_text(text)
result = model.predict(vectorizer.transform([tweet]))
toxicity_label = "toxic" if result[0] == 1 else "not toxic"
return render_template('index.html', result=toxicity_label)
if __name__ == '__main__':
app.run(debug=True)