-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnaive-bayes.py
110 lines (78 loc) · 2.83 KB
/
naive-bayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
def accuracy_score(y_true, y_pred):
return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)
def pre_processing(df):
X = df.drop([df.columns[-1]], axis = 1)
y = df[df.columns[-1]]
return X, y
class NaiveBayes:
def __init__(self):
self.features = list
self.likelihoods = {}
self.class_priors = {}
self.pred_priors = {}
self.X_train = np.array
self.y_train = np.array
self.train_size = int
self.num_feats = int
def fit(self, X, y):
self.features = list(X.columns)
self.X_train = X
self.y_train = y
self.train_size = X.shape[0]
self.num_feats = X.shape[1]
for feature in self.features:
self.likelihoods[feature] = {}
self.pred_priors[feature] = {}
for feat_val in np.unique(self.X_train[feature]):
self.pred_priors[feature].update({feat_val: 0})
for outcome in np.unique(self.y_train):
self.likelihoods[feature].update({feat_val+'_'+outcome:0})
self.class_priors.update({outcome: 0})
self._calc_class_prior()
self._calc_likelihoods()
self._calc_predictor_prior()
def _calc_class_prior(self):
for outcome in np.unique(self.y_train):
outcome_count = sum(self.y_train == outcome)
self.class_priors[outcome] = outcome_count / self.train_size
def _calc_likelihoods(self):
for feature in self.features:
for outcome in np.unique(self.y_train):
outcome_count = sum(self.y_train == outcome)
feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()
for feat_val, count in feat_likelihood.items():
self.likelihoods[feature][feat_val + '_' + outcome] = count/outcome_count
def _calc_predictor_prior(self):
for feature in self.features:
feat_vals = self.X_train[feature].value_counts().to_dict()
for feat_val, count in feat_vals.items():
self.pred_priors[feature][feat_val] = count/self.train_size
def predict(self, X):
results = []
X = np.array(X)
for query in X:
probs_outcome = {}
for outcome in np.unique(self.y_train):
prior = self.class_priors[outcome]
likelihood = 1
evidence = 1
for feat, feat_val in zip(self.features, query):
likelihood *= self.likelihoods[feat][feat_val + '_' + outcome]
evidence *= self.pred_priors[feat][feat_val]
posterior = (likelihood * prior) / (evidence)
probs_outcome[outcome] = posterior
result = max(probs_outcome, key = lambda x: probs_outcome[x])
results.append(result)
return np.array(results)
if __name__ == "__main__":
df = pd.read_table("weather.txt")
X,y = pre_processing(df)
nb_clf = NaiveBayes()
nb_clf.fit(X, y)
print(f"Train Accuracy: {accuracy_score(y, nb_clf.predict(X))}")
test = np.array([['Rainy','Mild', 'Normal', 't']])
print(f"Test => {test} \nOutput: {nb_clf.predict(test)}")