improve code readability

Pantzan · Feb 17, 2019 · 2861fea · 2861fea
1 parent 6274368
commit 2861fea
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 19 deletions.
diff --git a/Ncv.py b/Ncv.py
@@ -1,27 +1,29 @@
 from PythonKnn import PythonKnn
 
 def NestedCrossVal(X,y,foldK,nns,dists,mySeed):
+
     np.random.seed(mySeed)
-    accuracy_fold=[]   
+    accuracy_fold = []   
     indices = np.random.permutation(X.shape[0])
-    bins=np.array_split(indices, foldK)
+    bins = np.array_split(indices, foldK)
     sum_matrix = []
-    assert(foldK==len(bins))
+    assert(foldK == len(bins))
+
     for i in range(0,foldK):
         foldTrain=[] 
         foldTest=[]
         foldVal=[]
-
         # split the X to train, test and validation folds
         foldTest.extend(bins[i])
+
         if(i != foldK-1):
             foldVal.extend(bins[i+1])
         else:
             foldVal.extend(bins[0])  
 
         # don't add dublicates
         for j in range(0,len(bins)):
-            if(i != j and i+1 != j):
+            if(i != j and i + 1 != j):
                 if(i == foldK-1 and j == 0):                   
                     continue
                 foldTrain.extend(bins[j])
@@ -35,7 +37,8 @@ def NestedCrossVal(X,y,foldK,nns,dists,mySeed):
         bestAccuracy=-10
 
         #loop to all available parametres and find the best to train the model
-        for d in range(0, len(dists)):           
+        for d in range(0, len(dists)):  
+
             for k in nns:
                 knn = PythonKnn(k, dists[d])
                 knn.fit(X[foldTrain], y[foldTrain])              

diff --git a/PythonKnn.py b/PythonKnn.py
@@ -1,7 +1,12 @@
 import numpy as np
-class PythonKnn:
+import math
+import operator
+from collections import Counter
+
+class PythonKnn(object):
     # initialize the object and it's variables
     def __init__(self, n, metric):
+
         self.predicted = []
         self.X = None
         self.y = None
@@ -10,46 +15,51 @@ def __init__(self, n, metric):
 
     #Fit function to get X and y from IRIS dataset.
     def fit(self, Xx_, yy_):
+
         if(len(Xx_) != len(yy_)):
             raise ValueError("Data not equal in size with labels")
         self.X = Xx_
         self.y = yy_
 
     # Get the accuracy of the model and raise errors if the array sizes are wrong
     def accuracy(self, y_test,y_pred):
+
         if(len(y_test) != len(y_pred)):
             raise ValueError("Prediction and labels are not equal in size")
         a = np.where(y_test == y_pred)
         accuracy =  (float(len(a[0])) / len(y_test))
+
         return accuracy
 
     # Get the euclidean distance from two arrays by ziping them with Pythonic way
     def euclideanDistance(self, in1,in2):
-        import math
+
         return math.sqrt(sum([(a - b) ** 2 for a, b in zip(in1, in2)]))
 
     # Get the absolute distance from two arrays for experiment purposes
     def absoluteDistance(self, in1, in2):
+
         return float(abs(sum([(a - b) for a, b in zip(in1, in2)])))
 
     # check the distance from all points
     def getNeighbours(self, x_):
+
         dists = []
         X = self.X
         metric = self.metric
 
         for i in range(0, X.shape[0]-1):
+
             if metric == 'euclidean':
                 e = (( i, self.euclideanDistance(X[i], x_) ))
             elif metric == 'absolute':
                 e = (( i, self.absoluteDistance(X[i], x_) ))
             else:
                 raise ValueError("I don't understand this metric. Please choose 'euclidean' or 'absolute'" )
-#             print(X[i], x_, e[1])
             dists.append(e)
 
         # sort the tuple by the distance index
-        import operator
+
         s = dists.copy()
         s.sort(key=operator.itemgetter(1))
         # get the n neighbours
@@ -59,23 +69,25 @@ def getNeighbours(self, x_):
 
         # append to the array the calculated label
         self.assignLabel(label)
+
         return label 
 
     # get the labels from n neighbours
     def getLabel(self, neighbours):
-        from collections import Counter
+
         indexes = []
         a_labels = []
         labels = []
-
         # check the distance from the first item in the array if neighbours.
         #If the distance is zero, then means  we found jackpot and returning this label as 100% correct prediction
         first = neighbours[0]
         if (first[1]) == 0:
+
             label = (self.y[first[0]],0)
         # get the majority of the labels and return it as the calculated label   
         else:        
             for i in range(0, len(neighbours)):
+
                 index = neighbours[i][0]    
                 a_labels.append(self.y[index])    
 
@@ -86,23 +98,29 @@ def getLabel(self, neighbours):
 
     # append to predicted array the correct label for each index of x test
     def assignLabel(self, label):
+
         self.predicted.append(label)
+
         return self.predicted 
 
     # the main function. Calls all the helper Knn functions and returns the predicted array
     # It also does checks about empty or not identical in size arrays
     def predict(self, x_): 
+
         if (len(x_) > len(self.X)):
             raise ValueError("Test array is larger than the training data")
         elif (len(x_) == 0):
             raise ValueError("Test array is empty")
         for i in x_: 
             self.getNeighbours(i)
+
         return self.predicted
 
     # calculates the confusion matrix according to the prediction.
     def confMat(self, y_test_,y_pred_,classno):
+
         matrix = np.zeros(9).reshape(classno,classno)
         for i in range(0,y_test_.size):
-            matrix[y_test_[i], y_pred_[i]] += 1    
+            matrix[y_test_[i], y_pred_[i]] += 1  
+
         return matrix
diff --git a/TestKnn.py b/TestKnn.py
@@ -1,7 +1,8 @@
 from PythonKnn import PythonKnn
 # Load the IRIS dataset, as in the labs
-# %matplotlib inline
 
+# uncomment this if you use jupyter
+# %matplotlib inline
 
 from sklearn import datasets
 import numpy as np
@@ -18,7 +19,6 @@
 
 
 iris = datasets.load_iris()
-
 #view a description of the dataset (uncomment next line to do so)
 #print(iris.DESCR)
 
@@ -40,10 +40,12 @@
 bins=np.array_split(indices,2) # we  just need a training and testing set here
 foldTrain=bins[0]
 foldTest=bins[1]
-knn=PythonKnn(10,'euclidean')
-knn.fit(X[foldTrain],y[foldTrain])
+
+knn=PythonKnn(10, 'euclidean')
+knn.fit(X[foldTrain], y[foldTrain])
 y_pred=knn.predict(X[foldTest])
+
 a = np.where(y_pred != y[foldTest])
-print(knn.accuracy(y[foldTest],y_pred))
-# print(knn.confMat())
+print("accuracy: ", knn.accuracy(y[foldTest],y_pred))
+
 # print(knn.confMat(y[foldTest],y_pred,len(np.unique(y))))