-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprediction_system.py
45 lines (36 loc) · 1.44 KB
/
prediction_system.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import utils
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
class PredictionModel:
def __init__(self):
df = self.load_data()
self.model = self.train_model(df)
def train_model(self, df):
print("Training classifier.")
X = df[list(set(df.columns) - set(["index", "decision"]))]
y = df["decision"]
self.columns = X.columns
X_resampled, y_resampled = RandomOverSampler(ratio={"A":150, "B":100}).fit_sample(X, y)
X_resampled, y_resampled = RandomUnderSampler(ratio={"None":250}).fit_sample(X_resampled, y_resampled)
model = RandomForestClassifier(n_estimators=500, max_features=None)
model.fit(X_resampled, y_resampled)
print("Training complete.")
return model
def predict(self, row):
def preprocess(row):
row = row[self.columns]
if len(row) == 1:
return row.values.reshape(1, -1)
return row
row = preprocess(row)
return self.model.predict(row)
def load_data(self):
df = utils.load_data("data/trialPromoResults.csv")
return df