-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsample_creator_unit_auto.py
138 lines (131 loc) · 4.87 KB
/
sample_creator_unit_auto.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
'''
DL models (FNN, 1D CNN and CNN-LSTM) evaluation on N-CMAPSS
'''
# Import libraries in python
import gc
import argparse
import os
# import json
# import logging
# import sys
# import h5py
# import time
# import matplotlib
import numpy as np
# import pandas as pd
# from pandas import DataFrame
# import matplotlib.pyplot as plt
# from matplotlib import gridspec
# import math
import random
# import importlib
# from scipy.stats import randint, expon, uniform
# import sklearn as sk
# from sklearn import svm
# from sklearn.utils import shuffle
# from sklearn import metrics
# from sklearn import preprocessing
# from sklearn import pipeline
# from sklearn.metrics import mean_squared_error
# from math import sqrt
# import scipy.stats as stats
from utils.data_preparation_unit import df_all_creator, \
df_train_creator, \
df_test_creator, Input_Gen
seed = 0
random.seed(0)
np.random.seed(seed)
current_dir = os.path.dirname(os.path.abspath(__file__))
data_filedir = os.path.join(current_dir, 'Data/N-CMAPSS')
data_filepath = os.path.join(current_dir, 'Data/N-CMAPSS',
'N-CMAPSS_DS02-006.h5')
def main():
# current_dir = os.path.dirname(os.path.abspath(__file__))
parser = argparse.ArgumentParser(description='sample creator')
parser.add_argument('-w', type=int, default=10,
help='window length', required=True)
parser.add_argument('-s', type=int, default=10,
help='stride of window')
parser.add_argument('--sampling', type=int, default=1,
help='sub sampling of the given data. If it is 10,\
then this indicates that we assumes 0.1Hz\
of data collection')
parser.add_argument('--test', type=int, default='non',
help='select train or test, if it is zero, then\
extract samples from the engines used\
for training')
args = parser.parse_args()
sequence_length = args.w
stride = args.s
sampling = args.sampling
selector = args.test
# Load data
'''
W: operative conditions (Scenario descriptors)
X_s: measured signals
X_v: virtual sensors
T(theta): engine health parameters
Y: RUL [in cycles]
A: auxiliary data
'''
df_all = df_all_creator(data_filepath, sampling)
'''
Split dataframe into Train and Test
Training units: 2, 5, 10, 16, 18, 20
Test units: 11, 14, 15
'''
# units = list(np.unique(df_A['unit']))
units_index_train = [2.0, 5.0, 10.0, 16.0, 18.0, 20.0]
units_index_test = [11.0, 14.0, 15.0]
print("units_index_train", units_index_train)
print("units_index_test", units_index_test)
# if any(int(idx) == unit_index for idx in units_index_train):
# df_train = df_train_creator(df_all, units_index_train)
# print(df_train)
# print(df_train.columns)
# print("num of inputs: ", len(df_train.columns) )
# df_test = pd.DataFrame()
# else :
# df_test = df_test_creator(df_all, units_index_test)
# print(df_test)
# print(df_test.columns)
# print("num of inputs: ", len(df_test.columns))
# df_train = pd.DataFrame()
df_train = df_train_creator(df_all, units_index_train)
print(df_train)
print(df_train.columns)
print("num of inputs: ", len(df_train.columns))
df_test = df_test_creator(df_all, units_index_test)
print(df_test)
print(df_test.columns)
print("num of inputs: ", len(df_test.columns))
del df_all
gc.collect()
# df_all = pd.DataFrame()
sample_dir_path = os.path.join(data_filedir, 'Samples_whole')
sample_folder = os.path.isdir(sample_dir_path)
if not sample_folder:
os.makedirs(sample_dir_path)
print("created folder : ", sample_dir_path)
cols_normalize = df_train.columns.difference(['RUL', 'unit'])
sequence_cols = df_train.columns.difference(['RUL', 'unit'])
if selector == 0:
for unit_index in units_index_train:
data_class = Input_Gen(df_train, df_test,
cols_normalize,
sequence_length,
sequence_cols,
sample_dir_path,
unit_index,
sampling,
stride=stride)
data_class.seq_gen()
else:
for unit_index in units_index_test:
data_class = Input_Gen(df_train, df_test, cols_normalize,
sequence_length, sequence_cols,
sample_dir_path, unit_index,
sampling, stride=stride)
data_class.seq_gen()
if __name__ == '__main__':
main()