From 79dc5d8bedd6f8d3467e919f80a61720d06b86ed Mon Sep 17 00:00:00 2001 From: Shiva Shakeri Date: Thu, 10 Feb 2022 00:21:37 +0330 Subject: [PATCH] Initial commit --- .DS_Store | Bin 0 -> 6148 bytes KNN.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 .DS_Store create mode 100644 KNN.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c7e7bd695908a9b87657ec51363988e56a7601a5 GIT binary patch literal 6148 zcmeHKF-`+P474FakiK!Gw+Xej8e;$8eKnXxUT}_ey|C#lQBev=0V(jU0KX3nPV9w4VthIf z;s^koARUHroF#yb31BZA5|M#$EObYz90!Pew8$bX6 literal 0 HcmV?d00001 diff --git a/KNN.py b/KNN.py new file mode 100644 index 0000000..aeba89f --- /dev/null +++ b/KNN.py @@ -0,0 +1,67 @@ +from math import sqrt +import pandas as pd +import numpy as np +from collections import Counter + + +def trainTestSplit(data): + rowNum = data.shape[0] + splitIndex = int(80 / 100 * rowNum) + train = data.iloc[:splitIndex].reset_index(drop=True) #reset indexes to start from index 0 + test = data.iloc[splitIndex:].reset_index(drop=True) + return train, test + +def eucledianDistance(p1, p2): + distance = np.sqrt(np.sum((p1 - p2) ** 2)) + return distance + + + +df = pd.read_csv('wine.csv').sample(frac=1) + +classNum = len(set(df.iloc[:, 0])) +trainData, testData = trainTestSplit(df) + +featuresTrain = trainData.iloc[:, 1:].values +labelsTrain = trainData.iloc[:, 0].values + +trainNum = len(featuresTrain) +kk = sqrt(trainNum) + + +featuresTest = testData.iloc[:, 1:].values +labelsTest = testData.iloc[:, 0].values + +# print(labels) + +def KNNPredict(featuresTrain, labelsTrain, featureTest, k=15): + distances = [] + + #for every example in the training set, calculate eucledien distance against the test example + for i, point in enumerate(featuresTrain): + distances.append((i, eucledianDistance(featureTest, point))) + distances.sort(key = lambda x : x[1]) + + labels = [] + for i, distance in distances[:k]: + labels.append(labelsTrain[i]) + + count = Counter(labels) + label = count.most_common()[0][0] + + return label + +def confusionMatrix(predictions, labels, classNum): + mat = np.zeros((classNum, classNum), dtype=np.int32) + + for i in range(len(predictions)): + mat[predictions[i] - 1, labels[i] - 1] += 1 + + return mat + +predictions = [] +for point in featuresTest: + predictions.append(KNNPredict(featuresTrain, labelsTrain, point)) +accuracy = np.sum(predictions == labelsTest) / len(labelsTest) * 100 +print(accuracy) +print(confusionMatrix(predictions, labelsTest, classNum)) \ No newline at end of file