Skip to content

Commit

Permalink
flask done
Browse files Browse the repository at this point in the history
  • Loading branch information
talha1503 committed Jul 4, 2020
1 parent ba255f9 commit 44af6a1
Show file tree
Hide file tree
Showing 6 changed files with 173,125 additions and 1 deletion.
146,373 changes: 146,373 additions & 0 deletions data.csv

Large diffs are not rendered by default.

130 changes: 129 additions & 1 deletion predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,22 @@
from torchtext.data import Field,BucketIterator,TabularDataset
import torchtext
from flask import Flask
from flask import request,jsonify
from flask import request,jsonify,render_template
from nltk.tokenize import sent_tokenize,word_tokenize
from gensim.summarization.summarizer import summarize
from gensim.summarization import keywords
from torchtext import data
from torchtext import vocab
import pickle
import pandas as pd
import re
import numpy as np
from werkzeug.utils import secure_filename
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] ='/home/talha/Desktop/fakeit/'

class Model(nn.Module):
def __init__(self,num_layers,pad_index,batch_size,vocab_size,embedding_matrix,embedding_dimensions,hidden_size,bidirectional):
Expand Down Expand Up @@ -66,10 +75,129 @@ def predict(text):
return "True"
return output

def return_accuracy(logits,label):
sigmoid = nn.Sigmoid()(logits)
predictions = torch.round(sigmoid)
predictions = predictions.view(32)
return (predictions == label).sum().float()/float(label.size(0))

def summarize_text(article):
summarized_article = summarize(article,word_count = 50)
return summarized_article

def clean_contractions(text, mapping):
text = text.lower()
specials = ["’", "‘", "´", "`"]
for s in specials:
text = text.replace(s, "'")
text = ' '.join([mapping[t] if t in mapping else mapping[t.lower()] if t.lower() in mapping else t for t in text.split(" ")])
return text

def remove_newlines(sent):
sent = re.sub(r'\s+', " ", sent )
return sent


def create_embedding_matrix(field,embeddings):
embedding_matrix = np.random.rand(len(field.vocab.stoi),100)
for string,index in field.vocab.stoi.items():
if not all(x == 0 for x in embeddings[string].tolist()):
embedding_matrix[index] = embeddings[string]
return embedding_matrix

@app.route('/form',methods=['POST','GET'])
def model_form():
return render_template('form.html')


@app.route('/train',methods=['POST','GET'])
def train():
learning_rate = float(request.form['learning_rate'])
epochs = int(request.form['epochs'])
csv_file = request.files['data']
csv_file.save(os.path.join(app.config['UPLOAD_FOLDER'], 'data.csv'))

train_df = pd.read_csv('./data.csv')
valid = train_df[15000:]
df_trn = pd.DataFrame({'text':train_df['title'].values, 'labels':train_df['label'].values})
df_val = pd.DataFrame({'text':valid['title'].values, 'labels':valid['label'].values})
df_trn = df_trn.sample(frac=1)
df_val = df_val.sample(frac=1)
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not", "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not", "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is", "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would", "i'd've": "i would have", "i'll": "i will", "i'll've": "i will have","i'm": "i am",'i\'m':'i am', "i've": "i have", "isn't": "is not", "it'd": "it would", "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock", "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is", "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as", "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would", "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have", "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have", "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are", "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is", "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have","you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have", "you're": "you are", "you've": "you have", 'colour': 'color', 'centre': 'center', 'favourite': 'favorite', 'travelling': 'traveling', 'counselling': 'counseling', 'theatre': 'theater', 'cancelled': 'canceled'}
df_trn['text'] = df_trn['text'].apply(lambda x: clean_contractions(str(x),contraction_mapping))
df_val['text'] = df_val['text'].apply(lambda x: clean_contractions(str(x),contraction_mapping))
df_trn['text'] = df_trn['text'].apply(lambda x: remove_newlines(str(x)))
df_val['text'] = df_val['text'].apply(lambda x: remove_newlines(str(x)))
df_trn.to_csv('train.csv',index = False)
df_val.to_csv('validation.csv',index = False)
tokenizer = lambda s: s.lower().split()
text1 = data.Field(tokenize=tokenizer,batch_first=True,include_lengths=True)
label = data.Field(sequential=False, use_vocab=False, pad_token=None, unk_token=None)
fields = [('text',text1),('labels',label)]
train_data, valid_data = data.TabularDataset.splits(path='./',train='train.csv',validation = 'validation.csv',format='csv',fields=fields,skip_header=True)
text1.build_vocab(train_data,valid_data)
label.build_vocab(train_data,valid_data)
embeddings = pickle.load(open('./embeddings.pkl','rb'))
text1.build_vocab(train_data,valid_data,vectors = embeddings)
label.build_vocab()
train_itr,valid_itr = data.BucketIterator.splits((train_data,valid_data),batch_size = 32,sort_key = lambda x: len(x.text),sort_within_batch = True,device = device)
embedding = create_embedding_matrix(text1,embeddings)
model = Model(pad_index = text1.vocab.stoi[text1.pad_token],
batch_size = 32,
vocab_size = len(text1.vocab),
embedding_matrix = embedding,
embedding_dimensions = 100,
hidden_size = 512,
bidirectional = False,
num_layers = 1
)
model = model.to(device = device)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate)


model.train()
for epoch in range(epochs):
print("Epoch {} out of {}".format(epoch,epochs))

epoch_train_loss = 0
epoch_train_accuracy = 0

epoch_valid_loss = 0
epoch_valid_accuracy = 0

for batch in train_itr:
model.train()
optimizer.zero_grad()

text = batch.text[0]
label = batch.labels

if text.size()[0] != 32:
continue

text.to(device)
label.to(device)

label = torch.tensor(label,dtype= torch.float32,device = device)
predictions = model(text)

loss = criterion(torch.sigmoid(predictions),label.unsqueeze(1))

loss.backward()
optimizer.step()

batch_loss = loss.item()/len(batch)
batch_accuracy = return_accuracy(predictions,label)

epoch_train_loss += loss.item()
epoch_train_accuracy += batch_accuracy.item()

print("Epoch Train Accuracy: ",epoch_train_accuracy/len(train_itr))
print("Epoch Train Loss: ",epoch_train_loss/len(train_itr))
return render_template('train.html',accuracy=epoch_train_accuracy/len(train_itr),epoch=epochs)


@app.route('/get_prediction',methods=['POST','GET'])
def get_predictions():
data = request.get_json()
Expand Down
15 changes: 15 additions & 0 deletions templates/form.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<form action="http://localhost:5000/train" method="POST" enctype = "multipart/form-data">
<label for="data">Upload a dataset named data.csv:</label>
<br>
<input type="file"
id="data" name="data"
accept=".csv">
<br>
<label for="learning_rate">Learning Rate:</label><br>
<input type="text" id="learning_rate" name="learning_rate">
<br>
<label for="epochs">Epochs:</label><br>
<input type="text" id="epochs" name="epochs">
<br>
<input type="submit" value="Submit">
</form>
6 changes: 6 additions & 0 deletions templates/train.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<html>
<div>
{{accuracy}}
{{epoch}}
</div>
</html>
Loading

0 comments on commit 44af6a1

Please sign in to comment.