Skip to content

Commit

Permalink
Clean the github and add some minor comments
Browse files Browse the repository at this point in the history
  • Loading branch information
RogersNtr committed Sep 10, 2018
1 parent cb734f6 commit 061ba40
Show file tree
Hide file tree
Showing 8 changed files with 279 additions and 251 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,8 @@ Hexapod_dataset
venv
.idea
__pycache__/
Page_Hinkley.py
Page_Hinkley.py
figure/
find_combination.py
input.csv
itineraries_results.csv
44 changes: 22 additions & 22 deletions Page_Hinkley.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,49 +3,46 @@ def __init__(self, delta=0.0000005, lambda_=2, alpha=1 - 0.0001):
"""
:param delta: Magnitude of changes
:param lambda_: the threshold to detect drift
:param lambda_: a threshold related to the FAR that is allowed and hence to detect drift.
:param alpha: the weight of a given element in the datastream (a.k.a fading factor)
"""
self.delta = delta
self.lambda_ = lambda_
self.alpha_ = alpha
self.sum = 0
# incrementally calculated mean of input data
self.x_item_mean = 0
# number of values in the data stream
self.num = 0
self.average = 0
self.x_item_mean = 0 # this represent, the mean at each iteration
self.num_iter = 0 # Number of iteration done so far.
self.is_change_detected = False

def reset_parameters_(self):
"""
Reset the parameters, each time a drift has been detected
:return:
"""
self.num = 0
self.num_iter = 0
self.x_item_mean = 0
self.sum = 0
self.average = 0

def set_input(self, x):
def set_data(self, x_item):
"""
It helps to incrementally add a value to the PH-test and check for each value added, if there is a drift or not
:param x: value from the data stream
:return: boolean, isChangeDetected or not in the datastream
Incrementally add a value to the PH-test and directly check for drift as far as at item is added.
:param x_item: value or instance add from the datastream
:return: boolean, True if a change occurred in the data stream, False otherwise.
"""
self.detect_drift_(x)
self.detect_drift_(x_item)
return self.is_change_detected

def detect_drift_(self, x):
def detect_drift_(self, x_item):
"""
Concept drift detection following the formula from 'Knowledge Discovery from Data Streams' by João Gamma (p. 76)
:param x: input data
Concept drift detection from 'Knowledge Discovery from Data Streams' by João Gamma (p. 76)
:param x_item: input data
"""
# calculate the average and sum
self.num += 1
self.x_item_mean = (x + self.x_item_mean * (self.num - 1)) / self.num
self.num_iter += 1
self.x_item_mean = (x_item + self.x_item_mean * (self.num_iter - 1)) / self.num_iter
# self.x_mean = self.x_mean + (x + self.x_mean * (self.num - 1)) / self.num
self.sum = self.sum * self.alpha_ + x - self.x_item_mean - self.delta
self.average = self.average * self.alpha_ + x_item - self.x_item_mean - self.delta

if self.sum > self.lambda_:
if self.average > self.lambda_:
self.is_change_detected = True
else:
self.is_change_detected = False
Expand All @@ -55,5 +52,8 @@ def detect_drift_(self, x):
return self.is_change_detected

def point_of_drift(self):
return self.num
"""
:return: position of the drift (where the drift occurs)
"""
return self.num_iter

188 changes: 92 additions & 96 deletions drift_dataset_hexapod.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,6 @@ def kolmogorov_smirnov(data, window_size=100):
return drift


def page_hinkley(data):
return 0


def norm_(x, min_, max_):
return (x-min_)/(max_ - min_)

Expand Down Expand Up @@ -382,34 +378,34 @@ def get_actual_data(i):

# Plot the current for each terrain

# for i in range(7):
# if i == 0:
# actual_current = current_bf_pd
# title = "current on Black Flat(" + str(i) + ") terrain"
# elif i == 1:
# actual_current = current_br_pd
# title = "current on Black Rough(" + str(i) + ") terrain"
# elif i == 2:
# actual_current = current_cu_pd
# title = "current on Cubes(" + str(i) + ") terrain"
# elif i == 3:
# actual_current = current_flat_pd
# title = "current on Flat(" + str(i) + ") terrain"
# elif i == 4:
# actual_current = current_gf_pd
# title = "current on Grass Flat(" + str(i) + ") terrain"
# elif i == 5:
# actual_current = current_gr_pd
# title = "current on Grass Rough(" + str(i) + ") terrain"
# else:
# print("something wrong happen when ploting each current terrain")
#
# plt.figure()
# plt.xlabel("time steps")
# plt.ylabel("current")
# plt.plot(actual_current['current'])
# plt.title(title)
# plt.savefig("figure/" + "Terrain{}".format(i) + ".png")
for i in range(7):
if i == 0:
actual_current = current_bf_pd
title = "current on Black Flat(" + str(i) + ") terrain"
elif i == 1:
actual_current = current_br_pd
title = "current on Black Rough(" + str(i) + ") terrain"
elif i == 2:
actual_current = current_cu_pd
title = "current on Cubes(" + str(i) + ") terrain"
elif i == 3:
actual_current = current_flat_pd
title = "current on Flat(" + str(i) + ") terrain"
elif i == 4:
actual_current = current_gf_pd
title = "current on Grass Flat(" + str(i) + ") terrain"
elif i == 5:
actual_current = current_gr_pd
title = "current on Grass Rough(" + str(i) + ") terrain"
else:
print("something wrong happen when ploting each current terrain")

plt.figure()
plt.xlabel("time steps")
plt.ylabel("current")
plt.plot(actual_current['current'])
plt.title(title)
plt.savefig("figure/" + "Terrain{}".format(i) + ".png")

# # # DP1 : Flat i.e {0, 3, 4}
start = datetime.now()
Expand All @@ -435,7 +431,7 @@ def get_actual_data(i):

# # # Run the drift detection over DPi, i = 1,...3
start = datetime.now()
delta_adwin = [0.001, 0.03, 0.6, 0.9] # Values of delta for ADWIN_V1 (confidence value)
delta_adwin = [0.001, 0.3, 2, 4] # Values of delta for ADWIN_V1 (confidence value)
min_len_win = [5, 10, 20, 32]
adwin = Adwin(delta=1)
delta_hinkley = [0.00005, 0.03, 0.6, 0.9] # Different delta for the PH test (magintude of changes)
Expand All @@ -452,57 +448,57 @@ def get_actual_data(i):
############################################
# print(DP1[0:200])
print("######Size of the data {}".format(choose_size))
# for delta_i in delta_adwin:
# # adwin = Adwin(delta=delta_i, max_buckets=5, min_clock=5, min_length_window=5, min_length_sub_window=1)
# rand_nber = np.random.randint(0, 5)
# min_clock_val = adwin_min_clock[1]
# adwin = Adwin(delta=delta_i, max_buckets=5, min_clock=min_clock_val, min_length_window=5, min_length_sub_window=1)
# true_drift = 0
# false_drift = 0
# # actual_data = DP1
# print("#######################################################Result for delta = {} and min_clock = {}".format(delta_i, min_clock_val))
# for i in range(1, 6):
# actual_data, title_curve = get_actual_data(i)
#
# # print("ADWIN_V1, true positive : {}".format(true_drift))
# # # # # ------> ADWIN_V1
# # print("\n\n")
# print("\nADWIN start for {}.....".format(title_curve))
# index_drift = 0
# actual_data = actual_data['current']
# for dat in actual_data:
# index_drift += 1
# if adwin.set_input(dat):
# print("ADWIN_V1: drift at {}".format(index_drift))
# if i == 1:
# if (150<= index_drift <=250) or (375<= index_drift <=460):
# true_drift+=1
# else:
# false_drift+=1
# elif i == 2:
# if (150<= index_drift <=260) or (375<= index_drift <=420):
# true_drift+=1
# else:
# false_drift+=1
# elif i == 3:
# if 150<= index_drift <=250:
# true_drift+=1
# else:
# false_drift+=1
# elif i == 4:
# if 150 <= index_drift <=250:
# true_drift+=1
# else:
# false_drift+=1
# elif i == 5:
# false_drift+=1
# elif i == 5:
# true_drift+=1
# print("ADWIN_V1 : True Positive {}, False Positive {}".format(true_drift, false_drift))
# true_drift = 0
# false_drift = 0
# # else:
# # print("pas de drift")
for delta_i in delta_adwin:
# adwin = Adwin(delta=delta_i, max_buckets=5, min_clock=5, min_length_window=5, min_length_sub_window=1)
rand_nber = np.random.randint(0, 5)
min_clock_val = adwin_min_clock[1]
adwin = Adwin(delta=delta_i, max_buckets=5, min_clock=min_clock_val, min_length_window=5, min_length_sub_window=1)
true_drift = 0
false_drift = 0
# actual_data = DP1
print("#######################################################Result for delta = {} and min_clock = {}".format(delta_i, min_clock_val))
for i in range(1, 6):
actual_data, title_curve = get_actual_data(i)

# print("ADWIN_V1, true positive : {}".format(true_drift))
# # # # ------> ADWIN_V1
# print("\n\n")
print("\nADWIN start for {}.....".format(title_curve))
index_drift = 0
actual_data = actual_data['current']
for dat in actual_data:
index_drift += 1
if adwin.set_input(dat):
print("ADWIN_V1: drift at {}".format(index_drift))
if i == 1:
if (150<= index_drift <=250) or (375<= index_drift <=460):
true_drift+=1
else:
false_drift+=1
elif i == 2:
if (150<= index_drift <=260) or (375<= index_drift <=420):
true_drift+=1
else:
false_drift+=1
elif i == 3:
if 150<= index_drift <=250:
true_drift+=1
else:
false_drift+=1
elif i == 4:
if 150 <= index_drift <=250:
true_drift+=1
else:
false_drift+=1
elif i == 5:
false_drift+=1
elif i == 5:
true_drift+=1
print("ADWIN_V1 : True Positive {}, False Positive {}".format(true_drift, false_drift))
true_drift = 0
false_drift = 0
# else:
# print("pas de drift")

############################################
# Page-Hinkley Test results #
Expand All @@ -526,7 +522,7 @@ def get_actual_data(i):
# PH_2 = PH_test(actual_data, delta_=delta_test_ph, lambda_=lambda_i, alpha_=1 - 0.0001)
for dat1 in actual_data:
index_drift += 1
if PH_.set_input(dat1):
if PH_.set_data(dat1):
print("Page Hinkley: drift at {}".format(index_drift))
if i == 1:
if (150 <= index_drift <= 250) or (375 <= index_drift <= 460):
Expand Down Expand Up @@ -562,17 +558,17 @@ def get_actual_data(i):
# K-S Test results #
############################################
print("\n######-------------> KS test <-----------#########\n")
# for win_i in ks_window_sizes:
# rand_delta_hinkley = np.random.randint(0, 4) # A random number to choose a value of delta for the PH-test
# true_drift = 0
# false_drift = 0
# print("#######################################################Result for window_size = {}".format(win_i))
# for i in range(1, 6):
# actual_data, title_curve = get_actual_data(i)
# print("\nKS-test start for {}.....".format(title_curve))
# index_drift = 0
# kolmogorov_smirnov(actual_data, window_size=win_i)
#
for win_i in ks_window_sizes:
rand_delta_hinkley = np.random.randint(0, 4) # A random number to choose a value of delta for the PH-test
true_drift = 0
false_drift = 0
print("#######################################################Result for window_size = {}".format(win_i))
for i in range(1, 6):
actual_data, title_curve = get_actual_data(i)
print("\nKS-test start for {}.....".format(title_curve))
index_drift = 0
kolmogorov_smirnov(actual_data, window_size=win_i)

# for win in ks_window_sizes:
# # ploting the concatenate current
# # plotting the concatenations
Expand Down
Loading

0 comments on commit 061ba40

Please sign in to comment.