-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
76 lines (63 loc) · 3.84 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import numpy as np
import os
import local_info
def load_activations(is_FFT):
# loads the activations (of the NN) that were stored (storing => don't need to train the NN each time)
print("Reading activations dataset...")
if is_FFT:
activations_train = np.array(pd.read_csv(local_info.data_path + 'activations_train_fft.csv').values)
activations_valid = np.array(pd.read_csv(local_info.data_path + 'activations_valid_fft.csv').values)
activations_test = np.array(pd.read_csv(local_info.data_path + 'activations_test_fft.csv').values)
print("Activations (FFT) have been read from csv! (train shape is %s)" % str(np.shape(activations_train)))
else:
activations_train = np.array(pd.read_csv(local_info.data_path + 'activations_train.csv').values)
activations_valid = np.array(pd.read_csv(local_info.data_path + 'activations_valid.csv').values)
activations_test = np.array(pd.read_csv(local_info.data_path + 'activations_test.csv').values)
# activations_train = np.loadtxt(local_info.data_path + 'activations_train.csv', delimiter=",")
# activations_valid = np.loadtxt(local_info.data_path + 'activations_valid.csv', delimiter=",")
# activations_test = np.loadtxt(local_info.data_path + 'activations_test.csv', delimiter=",")
print("Activations have been read from csv! (train shape is %s)" % str(np.shape(activations_train)))
return activations_train, activations_valid, activations_test
def store_activations(activ_train, activ_valid, activ_test, is_FFT):
# stores the activations (of the NN) for future usage (storing => don't need to train the NN each time)
os.chdir(local_info.data_path)
if is_FFT:
pd.DataFrame(activ_train).to_csv('activations_train_FFT.csv', encoding='utf-8', index=False)
pd.DataFrame(activ_valid).to_csv('activations_valid_FFT.csv', encoding='utf-8', index=False)
pd.DataFrame(activ_test).to_csv('activations_test_FFT.csv', encoding='utf-8', index=False)
print("Activations (FFT) have been stored! (train shape is %s)\n" % str(np.shape(activ_train)))
else:
pd.DataFrame(activ_train).to_csv('activations_train.csv', encoding='utf-8', index=False)
pd.DataFrame(activ_valid).to_csv('activations_valid.csv', encoding='utf-8', index=False)
pd.DataFrame(activ_test).to_csv('activations_test.csv', encoding='utf-8', index=False)
print("Activations have been stored! (train shape is %s)\n" % str(np.shape(activ_train)))
def load_dataset(small_dataset, storing_small_dataset):
# loads the dataset from csv (small dataset for debugging
print("Reading raw dataset...")
if small_dataset:
print("Small dataset chosen")
df_train = pd.read_csv(local_info.data_path + 'extract_train.csv')
df_test = pd.read_csv(local_info.data_path + 'extract_test.csv')
else:
print("Full dataset chosen")
df_train = pd.read_csv(local_info.data_path + 'train.csv')
df_test = pd.read_csv(local_info.data_path + 'test.csv')
print("Done reading dataset! \n")
# stores an extract of 500 rows instead of 50k for debugging purposes
if storing_small_dataset:
store_small_dataset(df_train, 500, "train")
store_small_dataset(df_test, 500, "test")
return df_train, df_test
def normalize_dataframe(df, mean, var):
# normalizes the entire df by substracting mean and dividing by var
df_out = df.sub(mean, axis=0)
df_out = df_out.div(var, axis=0)
return df_out
def store_small_dataset(df, length, name):
# create small dataset for debugging purposes
print("Storing extract of " + name + " data...")
os.chdir(local_info.data_path)
temp_df = df[0:length]
temp_df.to_csv('extract_' + name + '.csv', encoding='utf-8', index=False)
print("Done storing %s extract at location %s ! \n" % (name, os.getcwd()))