Skip to content

Commit 57fd814

Browse files
committed
All anomaly detection rules migrated to use a library function.
1 parent cbf9ea0 commit 57fd814

6 files changed

+81
-135
lines changed

home/rules/percentile/anomaly_differencing_percentile_moving_average.py

+21-35
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import numpy as np
2020
import scipy
2121
import scipy.stats
22+
from lib.anomalies.anomaly_detection import detect_upper_bound_anomaly, detect_lower_bound_anomaly
2223

2324

2425
# rule specific parameters object, contains values received from the quality check threshold configuration
@@ -109,31 +110,23 @@ def evaluate_rule(rule_parameters: RuleExecutionRunParameters) -> RuleExecutionR
109110
# using a 0-based calculation (scale from 0)
110111
upper_median_multiples_array = [(difference / differences_median_float - 1.0) for difference
111112
in differences_list if difference >= differences_median_float]
112-
upper_multiples = np.array(upper_median_multiples_array, dtype=float)
113-
upper_multiples_median = np.median(upper_multiples)
114-
upper_multiples_std = scipy.stats.tstd(upper_multiples)
113+
threshold_upper_multiple = detect_upper_bound_anomaly(values_above_median=upper_median_multiples_array,
114+
degrees_of_freedom=degrees_of_freedom, tail=tail)
115115

116-
if float(upper_multiples_std) == 0:
117-
threshold_upper = differences_median_float
118-
else:
119-
# Assumption: the historical data follows t-student distribution
120-
upper_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=upper_multiples_median, scale=upper_multiples_std)
121-
threshold_upper_multiple = float(upper_readout_distribution.ppf(1 - tail))
116+
if threshold_upper_multiple is not None:
122117
threshold_upper = (threshold_upper_multiple + 1.0) * differences_median_float
118+
else:
119+
threshold_upper = rule_parameters.actual_value
123120

124121
lower_median_multiples_array = [(-1.0 / (difference / differences_median_float)) for difference
125122
in differences_list if difference <= differences_median_float if difference != 0]
126-
lower_multiples = np.array(lower_median_multiples_array, dtype=float)
127-
lower_multiples_median = np.median(lower_multiples)
128-
lower_multiples_std = scipy.stats.tstd(lower_multiples)
123+
threshold_lower_multiple = detect_lower_bound_anomaly(values_below_median=lower_median_multiples_array,
124+
degrees_of_freedom=degrees_of_freedom, tail=tail)
129125

130-
if float(lower_multiples_std) == 0:
131-
threshold_lower = differences_median_float
132-
else:
133-
# Assumption: the historical data follows t-student distribution
134-
lower_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=lower_multiples_median, scale=lower_multiples_std)
135-
threshold_lower_multiple = float(lower_readout_distribution.ppf(tail))
126+
if threshold_lower_multiple is not None:
136127
threshold_lower = differences_median_float * (-1.0 / threshold_lower_multiple)
128+
else:
129+
threshold_lower = rule_parameters.actual_value
137130

138131
passed = threshold_lower <= actual_difference <= threshold_upper
139132

@@ -145,28 +138,21 @@ def evaluate_rule(rule_parameters: RuleExecutionRunParameters) -> RuleExecutionR
145138
else:
146139
# using unrestricted method for both positive and negative values
147140
upper_half_filtered = [difference for difference in differences_list if difference >= differences_median_float]
148-
upper_half = np.array(upper_half_filtered, dtype=float)
149-
upper_half_median = np.median(upper_half)
150-
upper_half_std = scipy.stats.tstd(upper_half)
141+
threshold_upper_result = detect_upper_bound_anomaly(values_above_median=upper_half_filtered,
142+
degrees_of_freedom=degrees_of_freedom, tail=tail)
151143

152-
if float(upper_half_std) == 0:
153-
threshold_upper = differences_median_float
144+
if threshold_upper_result is not None:
145+
threshold_upper = threshold_upper_result
154146
else:
155-
# Assumption: the historical data follows t-student distribution
156-
upper_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=upper_half_median, scale=upper_half_std)
157-
threshold_upper = float(upper_readout_distribution.ppf(1 - tail))
147+
threshold_upper = rule_parameters.actual_value
158148

159149
lower_half_list = [difference for difference in differences_list if difference <= differences_median_float]
160-
lower_half = np.array(lower_half_list, dtype=float)
161-
lower_half_median = np.median(lower_half)
162-
lower_half_std = scipy.stats.tstd(lower_half)
163-
164-
if float(lower_half_std) == 0:
165-
threshold_lower = differences_median_float
150+
threshold_lower_result = detect_lower_bound_anomaly(values_below_median=lower_half_list,
151+
degrees_of_freedom=degrees_of_freedom, tail=tail)
152+
if threshold_lower_result is not None:
153+
threshold_lower = threshold_lower_result
166154
else:
167-
# Assumption: the historical data follows t-student distribution
168-
lower_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=lower_half_median, scale=lower_half_std)
169-
threshold_lower = float(lower_readout_distribution.ppf(tail))
155+
threshold_lower = rule_parameters.actual_value
170156

171157
passed = threshold_lower <= actual_difference <= threshold_upper
172158

home/rules/percentile/anomaly_differencing_percentile_moving_average_30_days.py

+21-35
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import numpy as np
2020
import scipy
2121
import scipy.stats
22+
from lib.anomalies.anomaly_detection import detect_upper_bound_anomaly, detect_lower_bound_anomaly
2223

2324

2425
# rule specific parameters object, contains values received from the quality check threshold configuration
@@ -109,31 +110,23 @@ def evaluate_rule(rule_parameters: RuleExecutionRunParameters) -> RuleExecutionR
109110
# using a 0-based calculation (scale from 0)
110111
upper_median_multiples_array = [(difference / differences_median_float - 1.0) for difference
111112
in differences_list if difference >= differences_median_float]
112-
upper_multiples = np.array(upper_median_multiples_array, dtype=float)
113-
upper_multiples_median = np.median(upper_multiples)
114-
upper_multiples_std = scipy.stats.tstd(upper_multiples)
113+
threshold_upper_multiple = detect_upper_bound_anomaly(values_above_median=upper_median_multiples_array,
114+
degrees_of_freedom=degrees_of_freedom, tail=tail)
115115

116-
if float(upper_multiples_std) == 0:
117-
threshold_upper = differences_median_float
118-
else:
119-
# Assumption: the historical data follows t-student distribution
120-
upper_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=upper_multiples_median, scale=upper_multiples_std)
121-
threshold_upper_multiple = float(upper_readout_distribution.ppf(1 - tail))
116+
if threshold_upper_multiple is not None:
122117
threshold_upper = (threshold_upper_multiple + 1.0) * differences_median_float
118+
else:
119+
threshold_upper = rule_parameters.actual_value
123120

124121
lower_median_multiples_array = [(-1.0 / (difference / differences_median_float)) for difference
125122
in differences_list if difference <= differences_median_float if difference != 0]
126-
lower_multiples = np.array(lower_median_multiples_array, dtype=float)
127-
lower_multiples_median = np.median(lower_multiples)
128-
lower_multiples_std = scipy.stats.tstd(lower_multiples)
123+
threshold_lower_multiple = detect_lower_bound_anomaly(values_below_median=lower_median_multiples_array,
124+
degrees_of_freedom=degrees_of_freedom, tail=tail)
129125

130-
if float(lower_multiples_std) == 0:
131-
threshold_lower = differences_median_float
132-
else:
133-
# Assumption: the historical data follows t-student distribution
134-
lower_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=lower_multiples_median, scale=lower_multiples_std)
135-
threshold_lower_multiple = float(lower_readout_distribution.ppf(tail))
126+
if threshold_lower_multiple is not None:
136127
threshold_lower = differences_median_float * (-1.0 / threshold_lower_multiple)
128+
else:
129+
threshold_lower = rule_parameters.actual_value
137130

138131
passed = threshold_lower <= actual_difference <= threshold_upper
139132

@@ -145,28 +138,21 @@ def evaluate_rule(rule_parameters: RuleExecutionRunParameters) -> RuleExecutionR
145138
else:
146139
# using unrestricted method for both positive and negative values
147140
upper_half_filtered = [difference for difference in differences_list if difference >= differences_median_float]
148-
upper_half = np.array(upper_half_filtered, dtype=float)
149-
upper_half_median = np.median(upper_half)
150-
upper_half_std = scipy.stats.tstd(upper_half)
141+
threshold_upper_result = detect_upper_bound_anomaly(values_above_median=upper_half_filtered,
142+
degrees_of_freedom=degrees_of_freedom, tail=tail)
151143

152-
if float(upper_half_std) == 0:
153-
threshold_upper = differences_median_float
144+
if threshold_upper_result is not None:
145+
threshold_upper = threshold_upper_result
154146
else:
155-
# Assumption: the historical data follows t-student distribution
156-
upper_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=upper_half_median, scale=upper_half_std)
157-
threshold_upper = float(upper_readout_distribution.ppf(1 - tail))
147+
threshold_upper = rule_parameters.actual_value
158148

159149
lower_half_list = [difference for difference in differences_list if difference <= differences_median_float]
160-
lower_half = np.array(lower_half_list, dtype=float)
161-
lower_half_median = np.median(lower_half)
162-
lower_half_std = scipy.stats.tstd(lower_half)
163-
164-
if float(lower_half_std) == 0:
165-
threshold_lower = differences_median_float
150+
threshold_lower_result = detect_lower_bound_anomaly(values_below_median=lower_half_list,
151+
degrees_of_freedom=degrees_of_freedom, tail=tail)
152+
if threshold_lower_result is not None:
153+
threshold_lower = threshold_lower_result
166154
else:
167-
# Assumption: the historical data follows t-student distribution
168-
lower_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=lower_half_median, scale=lower_half_std)
169-
threshold_lower = float(lower_readout_distribution.ppf(tail))
155+
threshold_lower = rule_parameters.actual_value
170156

171157
passed = threshold_lower <= actual_difference <= threshold_upper
172158

home/rules/percentile/anomaly_partition_row_count.py

+11-18
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import numpy as np
2020
import scipy
2121
import scipy.stats
22+
from lib.anomalies.anomaly_detection import detect_upper_bound_anomaly, detect_lower_bound_anomaly
2223

2324

2425
# rule specific parameters object, contains values received from the quality check threshold configuration
@@ -103,31 +104,23 @@ def evaluate_rule(rule_parameters: RuleExecutionRunParameters) -> RuleExecutionR
103104

104105
upper_median_multiples_array = [(readout / filtered_median_float - 1.0) for readout in extracted
105106
if readout >= filtered_median_float]
106-
upper_multiples = np.array(upper_median_multiples_array, dtype=float)
107-
upper_multiples_median = np.median(upper_multiples)
108-
upper_multiples_std = scipy.stats.tstd(upper_multiples)
107+
threshold_upper_multiple = detect_upper_bound_anomaly(values_above_median=upper_median_multiples_array,
108+
degrees_of_freedom=degrees_of_freedom, tail=tail)
109109

110-
if float(upper_multiples_std) == 0:
111-
threshold_upper = filtered_median_float
112-
else:
113-
# Assumption: the historical data follows t-student distribution
114-
upper_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=upper_multiples_median, scale=upper_multiples_std)
115-
threshold_upper_multiple = float(upper_readout_distribution.ppf(1 - tail))
110+
if threshold_upper_multiple is not None:
116111
threshold_upper = (threshold_upper_multiple + 1.0) * filtered_median_float
112+
else:
113+
threshold_upper = rule_parameters.actual_value
117114

118115
lower_median_multiples_array = [(-1.0 / (readout / filtered_median_float)) for readout in extracted
119116
if readout <= filtered_median_float if readout != 0]
120-
lower_multiples = np.array(lower_median_multiples_array, dtype=float)
121-
lower_multiples_median = np.median(lower_multiples)
122-
lower_multiples_std = scipy.stats.tstd(lower_multiples)
117+
threshold_lower_multiple = detect_lower_bound_anomaly(values_below_median=lower_median_multiples_array,
118+
degrees_of_freedom=degrees_of_freedom, tail=tail)
123119

124-
if float(lower_multiples_std) == 0:
125-
threshold_lower = filtered_median_float
126-
else:
127-
# Assumption: the historical data follows t-student distribution
128-
lower_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=lower_multiples_median, scale=lower_multiples_std)
129-
threshold_lower_multiple = float(lower_readout_distribution.ppf(tail))
120+
if threshold_lower_multiple is not None:
130121
threshold_lower = filtered_median_float * (-1.0 / threshold_lower_multiple)
122+
else:
123+
threshold_lower = rule_parameters.actual_value
131124

132125
passed = threshold_lower <= rule_parameters.actual_value <= threshold_upper
133126

home/rules/percentile/anomaly_stationary_count_values.py

+11-18
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import numpy as np
2020
import scipy
2121
import scipy.stats
22+
from lib.anomalies.anomaly_detection import detect_upper_bound_anomaly, detect_lower_bound_anomaly
2223

2324

2425
# rule specific parameters object, contains values received from the quality check threshold configuration
@@ -105,31 +106,23 @@ def evaluate_rule(rule_parameters: RuleExecutionRunParameters) -> RuleExecutionR
105106
tail = rule_parameters.parameters.anomaly_percent / 100.0
106107

107108
upper_median_multiples_array = [(readout / filtered_median_float - 1.0) for readout in extracted if readout >= filtered_median_float]
108-
upper_multiples = np.array(upper_median_multiples_array, dtype=float)
109-
upper_multiples_median = np.median(upper_multiples)
110-
upper_multiples_std = scipy.stats.tstd(upper_multiples)
109+
threshold_upper_multiple = detect_upper_bound_anomaly(values_above_median=upper_median_multiples_array,
110+
degrees_of_freedom=degrees_of_freedom, tail=tail)
111111

112-
if float(upper_multiples_std) == 0:
113-
threshold_upper = filtered_median_float
114-
else:
115-
# Assumption: the historical data follows t-student distribution
116-
upper_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=upper_multiples_median, scale=upper_multiples_std)
117-
threshold_upper_multiple = float(upper_readout_distribution.ppf(1 - tail))
112+
if threshold_upper_multiple is not None:
118113
threshold_upper = (threshold_upper_multiple + 1.0) * filtered_median_float
114+
else:
115+
threshold_upper = rule_parameters.actual_value
119116

120117
lower_median_multiples_array = [(-1.0 / (readout / filtered_median_float)) for readout in extracted
121118
if readout <= filtered_median_float if readout != 0]
122-
lower_multiples = np.array(lower_median_multiples_array, dtype=float)
123-
lower_multiples_median = np.median(lower_multiples)
124-
lower_multiples_std = scipy.stats.tstd(lower_multiples)
119+
threshold_lower_multiple = detect_lower_bound_anomaly(values_below_median=lower_median_multiples_array,
120+
degrees_of_freedom=degrees_of_freedom, tail=tail)
125121

126-
if float(lower_multiples_std) == 0:
127-
threshold_lower = filtered_median_float
128-
else:
129-
# Assumption: the historical data follows t-student distribution
130-
lower_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=lower_multiples_median, scale=lower_multiples_std)
131-
threshold_lower_multiple = float(lower_readout_distribution.ppf(tail))
122+
if threshold_lower_multiple is not None:
132123
threshold_lower = filtered_median_float * (-1.0 / threshold_lower_multiple)
124+
else:
125+
threshold_lower = rule_parameters.actual_value
133126

134127
passed = threshold_lower <= rule_parameters.actual_value <= threshold_upper
135128

home/rules/percentile/anomaly_stationary_percent_values.py

+11-20
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import numpy as np
2020
import scipy
2121
import scipy.stats
22+
from lib.anomalies.anomaly_detection import detect_upper_bound_anomaly, detect_lower_bound_anomaly
2223

2324

2425
# rule specific parameters object, contains values received from the quality check threshold configuration
@@ -111,36 +112,26 @@ def evaluate_rule(rule_parameters: RuleExecutionRunParameters) -> RuleExecutionR
111112
else:
112113
upper_median_multiples_array = [1.0 / (1.0 - readout / 100.0) for readout in extracted
113114
if readout >= filtered_median_float]
114-
upper_multiples = np.array(upper_median_multiples_array, dtype=float)
115-
upper_multiples_median = np.median(upper_multiples)
116-
upper_multiples_std = scipy.stats.tstd(upper_multiples)
115+
threshold_upper_multiple = detect_upper_bound_anomaly(values_above_median=upper_median_multiples_array,
116+
degrees_of_freedom=degrees_of_freedom, tail=tail)
117117

118-
if float(upper_multiples_std) == 0.0:
119-
threshold_upper = filtered_median_float
120-
else:
121-
# Assumption: the historical data follows t-student distribution
122-
upper_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=upper_multiples_median,
123-
scale=upper_multiples_std)
124-
threshold_upper_multiple = float(upper_readout_distribution.ppf(1 - tail))
118+
if threshold_upper_multiple is not None:
125119
threshold_upper = 100.0 - 100.0 * (1.0 / threshold_upper_multiple)
120+
else:
121+
threshold_upper = rule_parameters.actual_value
126122

127123
if 0.0 in all_extracted:
128124
threshold_lower = 0.0
129125
else:
130126
lower_median_multiples_array = [(-1.0 / (readout / filtered_median_float)) for readout in extracted
131127
if readout <= filtered_median_float]
132-
lower_multiples = np.array(lower_median_multiples_array, dtype=float)
133-
lower_multiples_median = np.median(lower_multiples)
134-
lower_multiples_std = scipy.stats.tstd(lower_multiples)
128+
threshold_lower_multiple = detect_lower_bound_anomaly(values_below_median=lower_median_multiples_array,
129+
degrees_of_freedom=degrees_of_freedom, tail=tail)
135130

136-
if float(lower_multiples_std) == 0.0:
137-
threshold_lower = filtered_median_float
138-
else:
139-
# Assumption: the historical data follows t-student distribution
140-
lower_readout_distribution = scipy.stats.t(df=degrees_of_freedom, loc=lower_multiples_median,
141-
scale=lower_multiples_std)
142-
threshold_lower_multiple = float(lower_readout_distribution.ppf(tail))
131+
if threshold_lower_multiple is not None:
143132
threshold_lower = filtered_median_float * (-1.0 / threshold_lower_multiple)
133+
else:
134+
threshold_lower = rule_parameters.actual_value
144135

145136
passed = threshold_lower <= rule_parameters.actual_value <= threshold_upper
146137

0 commit comments

Comments
 (0)