-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
76 lines (63 loc) · 2.47 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pickle, gzip, numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import math
def plot_images(X):
if X.ndim == 1:
X = np.array([X])
num_images = X.shape[0]
num_rows = math.floor(math.sqrt(num_images))
num_cols = math.ceil(num_images/num_rows)
for i in range(num_images):
reshaped_image = X[i,:].reshape(28,28)
plt.subplot(num_rows, num_cols, i+1)
plt.imshow(reshaped_image, cmap = cm.Greys_r)
plt.axis('off')
plt.show()
def pick_examples_of(X, Y, labels, total_count):
bool_arr = None
for label in labels:
bool_arr_for_label = (Y == label)
if bool_arr is None:
bool_arr = bool_arr_for_label
else:
bool_arr |= bool_arr_for_label
filtered_x = X[bool_arr]
filtered_y = Y[bool_arr]
return (filtered_x[:total_count], filtered_y[:total_count])
def extract_training_and_test_examples_with_labels(train_x, train_y, test_x, test_y, labels, training_count, test_count):
filtered_train_x, filtered_train_y = pick_examples_of(train_x, train_y, labels, training_count)
filtered_test_x, filtered_test_y = pick_examples_of(test_x, test_y, labels, test_count)
return (filtered_train_x, filtered_train_y, filtered_test_x, filtered_test_y)
def write_pickle_data(data, file_name):
f = gzip.open(file_name, 'wb')
pickle.dump(data, f)
f.close()
def read_pickle_data(file_name):
f = gzip.open(file_name, 'rb')
data = pickle.load(f, encoding='latin1')
f.close()
return data
def get_MNIST_data():
"""
Reads mnist dataset from file
Returns:
train_x - 2D Numpy array (n, d) where each row is an image
train_y - 1D Numpy array (n, ) where each row is a label
test_x - 2D Numpy array (n, d) where each row is an image
test_y - 1D Numpy array (n, ) where each row is a label
"""
train_set, valid_set, test_set = read_pickle_data('../Datasets/mnist.pkl.gz')
train_x, train_y = train_set
valid_x, valid_y = valid_set
train_x = np.vstack((train_x, valid_x))
train_y = np.append(train_y, valid_y)
test_x, test_y = test_set
return (train_x, train_y, test_x, test_y)
def load_train_and_test_pickle(file_name):
train_x, train_y, test_x, test_y = read_pickle_data(file_name)
return train_x, train_y, test_x, test_y
# returns the feature set in a numpy ndarray
def load_CSV(filename):
stuff = np.asarray(np.loadtxt(open(filename, 'rb'), delimiter=','))
return stuff