Skip to content

Commit d3f8660

Browse files
committed
Backend supports recalibrating checks that caused a data quality incident.
1 parent 6ee2ef5 commit d3f8660

File tree

140 files changed

+2971
-32
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+2971
-32
lines changed

dqops/src/main/java/com/dqops/checks/AbstractCheckSpec.java

+44
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import com.dqops.checks.comparison.AbstractComparisonCheckCategorySpecMap;
1919
import com.dqops.core.secrets.SecretValueProvider;
20+
import com.dqops.data.checkresults.normalization.CheckResultsNormalizedResult;
2021
import com.dqops.metadata.basespecs.AbstractSpec;
2122
import com.dqops.metadata.comments.CommentsListSpec;
2223
import com.dqops.metadata.id.ChildHierarchyNodeFieldMapImpl;
@@ -36,6 +37,7 @@
3637
import com.google.common.base.Strings;
3738
import lombok.EqualsAndHashCode;
3839
import lombok.ToString;
40+
import tech.tablesaw.api.IntColumn;
3941

4042
import java.util.Objects;
4143

@@ -456,4 +458,46 @@ public boolean isStandard() {
456458
public boolean isDefault() {
457459
return false; // we serialize all checks, even when they have no parameters (because they are too simple to have parameters) and have no alert thresholds (because they are only capturing values)
458460
}
461+
462+
/**
463+
* Checks if any rules (warning, error, fatal) are configured.
464+
* @return True when any severity rule is configured, false otherwise.
465+
*/
466+
public boolean hasAnyRulesEnabled() {
467+
return this.getWarning() != null || this.getError() != null || this.getFatal() != null;
468+
}
469+
470+
/**
471+
* Changes the rule parameters to decrease rule severity and generate less alerts.
472+
* @param checkResultsSingleCheck History of check results for this check for the time period used for analysis.
473+
*/
474+
public void decreaseCheckSensitivity(CheckResultsNormalizedResult checkResultsSingleCheck) {
475+
if (checkResultsSingleCheck.isEmpty()) {
476+
return;
477+
}
478+
479+
if (checkResultsSingleCheck.getActualValueColumn().isNotMissing().isEmpty()) {
480+
return; // no results, most calculations will fail
481+
}
482+
483+
IntColumn severityColumn = checkResultsSingleCheck.getSeverityColumn();
484+
485+
if (this.getFatal() != null) {
486+
if (!severityColumn.isEqualTo(3.0).isEmpty()) {
487+
this.getFatal().decreaseRuleSensitivity(checkResultsSingleCheck);
488+
}
489+
}
490+
491+
if (this.getError() != null) {
492+
if (!severityColumn.isEqualTo(2.0).isEmpty()) {
493+
this.getError().decreaseRuleSensitivity(checkResultsSingleCheck);
494+
}
495+
}
496+
497+
if (this.getWarning() != null) {
498+
if (!severityColumn.isEqualTo(1.0).isEmpty()) {
499+
this.getWarning().decreaseRuleSensitivity(checkResultsSingleCheck);
500+
}
501+
}
502+
}
459503
}

dqops/src/main/java/com/dqops/data/checkresults/factory/CheckResultsColumnNames.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ public class CheckResultsColumnNames extends SensorReadoutsColumnNames {
102102
* List of column names that should be loaded from the parquet files when the recent result overview is needed.
103103
* We only want to read the statuses.
104104
*/
105-
public static final String[] COLUMN_NAMES_FOR_RESULTS_OVERVIEW = new String[] {
105+
@Deprecated
106+
public static final String[] COLUMN_NAMES_FOR_RESULTS_OVERVIEW_OBSOLETE = new String[] {
106107
COLUMN_NAME_COLUMN_NAME,
107108
CHECK_TYPE_COLUMN_NAME,
108109
TIME_GRADIENT_COLUMN_NAME,
@@ -162,4 +163,10 @@ public class CheckResultsColumnNames extends SensorReadoutsColumnNames {
162163

163164
INCIDENT_HASH_COLUMN_NAME
164165
};
166+
167+
/**
168+
* List of column names that should be loaded from the parquet files when the recent result overview is needed.
169+
* We only want to read the statuses.
170+
*/
171+
public static final String[] COLUMN_NAMES_FOR_RESULTS_OVERVIEW = CHECK_RESULTS_COLUMN_NAMES_FOR_READ_ONLY_ACCESS;
165172
}

dqops/src/main/java/com/dqops/data/incidents/models/IncidentModel.java

+23-1
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,19 @@
1616

1717
package com.dqops.data.incidents.models;
1818

19+
import com.dqops.checks.CheckType;
1920
import com.dqops.data.incidents.factory.IncidentStatus;
2021
import com.dqops.data.incidents.factory.IncidentsColumnNames;
22+
import com.dqops.metadata.search.CheckSearchFilters;
2123
import com.dqops.metadata.search.StringPatternComparer;
24+
import com.dqops.metadata.sources.PhysicalTableName;
2225
import com.fasterxml.jackson.annotation.JsonInclude;
2326
import com.fasterxml.jackson.annotation.JsonPropertyDescription;
2427
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
2528
import com.fasterxml.jackson.databind.annotation.JsonNaming;
29+
import com.google.common.base.Strings;
2630
import lombok.Data;
2731
import org.apache.commons.lang3.StringUtils;
28-
import org.apache.parquet.Strings;
2932
import tech.tablesaw.api.Row;
3033

3134
import java.time.Instant;
@@ -281,4 +284,23 @@ public static Comparator<IncidentModel> makeSortComparator(IncidentSortOrder sor
281284
throw new NoSuchElementException("Unsupported sort order on: " + sortOrder);
282285
}
283286
}
287+
288+
/**
289+
* Creates a check search filter that will find all data quality checks that are covered by this incident.
290+
* @return Check search filter that matches all checks that are related to this incident.
291+
*/
292+
public CheckSearchFilters toCheckSearchFilter() {
293+
CheckSearchFilters checkSearchFilters = new CheckSearchFilters();
294+
checkSearchFilters.setConnection(this.getConnection());
295+
checkSearchFilters.setPhysicalTableName(new PhysicalTableName(this.schema, this.table));
296+
checkSearchFilters.setQualityDimension(this.getQualityDimension());
297+
checkSearchFilters.setCheckCategory(this.getCheckCategory());
298+
checkSearchFilters.setCheckName(this.getCheckName());
299+
300+
if (!Strings.isNullOrEmpty(this.checkType)) {
301+
checkSearchFilters.setCheckType(CheckType.valueOf(this.checkType));
302+
}
303+
304+
return checkSearchFilters;
305+
}
284306
}

dqops/src/main/java/com/dqops/data/readouts/normalization/SensorReadoutsNormalizedResult.java

+85
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,15 @@
1717

1818
import com.dqops.data.checkresults.factory.CheckResultsColumnNames;
1919
import com.dqops.data.readouts.factory.SensorReadoutsColumnNames;
20+
import com.dqops.metadata.search.CheckSearchFilters;
2021
import com.dqops.metadata.timeseries.TimePeriodGradient;
2122
import com.dqops.utils.tables.TableColumnUtility;
23+
import com.google.common.base.Strings;
2224
import tech.tablesaw.api.*;
2325
import tech.tablesaw.columns.Column;
26+
import tech.tablesaw.selection.Selection;
27+
28+
import java.time.Instant;
2429

2530
/**
2631
* Describes the dataset (dataframe) returned from the sensor. Identifies the time series column, data stream columns, etc.
@@ -519,4 +524,84 @@ public TextColumn getUpdatedByColumn() {
519524
public IntColumn getSeverityColumn() {
520525
return severityColumn;
521526
}
527+
528+
/**
529+
* Finds sensor result rows matching check search filters.
530+
* @param searchFilters Check search filters.
531+
* @return Selection (bitmap filter) of rows with results for that check.
532+
*/
533+
public Selection findResults(CheckSearchFilters searchFilters) {
534+
Table table = this.getTable();
535+
Selection selection = Selection.withRange(0, table.rowCount());
536+
537+
if (!Strings.isNullOrEmpty(searchFilters.getConnection())) {
538+
selection = selection.and(this.getConnectionNameColumn().isEqualTo(searchFilters.getConnection()));
539+
}
540+
541+
if (searchFilters.getPhysicalTableName() != null) {
542+
selection = selection.and(this.getSchemaNameColumn().isEqualTo(searchFilters.getPhysicalTableName().getSchemaName()));
543+
selection = selection.and(this.getTableNameColumn().isEqualTo(searchFilters.getPhysicalTableName().getTableName()));
544+
}
545+
546+
if (!Strings.isNullOrEmpty(searchFilters.getColumn())) {
547+
selection = selection.and(this.getColumnNameColumn().isEqualTo(searchFilters.getColumn()));
548+
}
549+
550+
if (searchFilters.getCheckType() != null) {
551+
selection = selection.and(this.getCheckTypeColumn().isEqualTo(searchFilters.getCheckType().getDisplayName()));
552+
}
553+
554+
if (searchFilters.getTimeScale() != null) {
555+
selection = selection.and(this.getTimeGradientColumn().isEqualTo(searchFilters.getTimeScale().toTimeSeriesGradient().toString()));
556+
}
557+
558+
if (!Strings.isNullOrEmpty(searchFilters.getCheckCategory())) {
559+
selection = selection.and(this.getCheckCategoryColumn().isEqualTo(searchFilters.getCheckCategory()));
560+
}
561+
562+
if (!Strings.isNullOrEmpty(searchFilters.getTableComparisonName())) {
563+
selection = selection.and(this.getTableComparisonNameColumn().isEqualTo(searchFilters.getTableComparisonName()));
564+
}
565+
566+
if (!Strings.isNullOrEmpty(searchFilters.getCheckName())) {
567+
selection = selection.and(this.getCheckNameColumn().isEqualTo(searchFilters.getCheckName()));
568+
}
569+
570+
if (!Strings.isNullOrEmpty(searchFilters.getQualityDimension())) {
571+
selection = selection.and(this.getQualityDimensionColumn().isEqualTo(searchFilters.getQualityDimension()));
572+
}
573+
574+
if (!Strings.isNullOrEmpty(searchFilters.getSensorName())) {
575+
selection = selection.and(this.getSensorNameColumn().isEqualTo(searchFilters.getSensorName()));
576+
}
577+
578+
return selection;
579+
}
580+
581+
/**
582+
* Returns true if the table with results is empty.
583+
* @return True when empty, false when there are some rows.
584+
*/
585+
public boolean isEmpty() {
586+
return this.getTable().isEmpty();
587+
}
588+
589+
/**
590+
* Finds the row index of the row that contains the most recent result.
591+
* @return The row index that contains the most recent result or null, when no results are present.
592+
*/
593+
public Integer getRowIndexWithMostRecentResult() {
594+
if (this.isEmpty()) {
595+
return null;
596+
}
597+
598+
Instant mostRecentExecutedAt = this.getExecutedAtColumn().max();
599+
Selection selectionOfMostRecentResults = this.getExecutedAtColumn().isEqualTo(mostRecentExecutedAt);
600+
if (selectionOfMostRecentResults.isEmpty()) {
601+
return null; // rather not possible
602+
}
603+
604+
int firstIndex = selectionOfMostRecentResults.get(selectionOfMostRecentResults.size() - 1);
605+
return firstIndex;
606+
}
522607
}

dqops/src/main/java/com/dqops/metadata/id/HierarchyNode.java

+34-2
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,46 @@
2020
import com.dqops.utils.serialization.YamlNotRenderWhenDefault;
2121
import org.apache.commons.collections.IteratorUtils;
2222

23-
import java.util.ArrayList;
24-
import java.util.Collections;
2523
import java.util.List;
24+
import java.util.function.Predicate;
2625

2726
/**
2827
* Interface implemented by objects that are represented on the hierarchy ID tree.
2928
*/
3029
public interface HierarchyNode extends DirtyStatus, ReadOnlyStatus {
30+
/**
31+
* Find an element in an array that is of a given data type or a derived type.
32+
* @param nodes Array of hierarchy nodes to search inside.
33+
* @param targetType Target type to find or a base type.
34+
* @return Found node or null.
35+
* @param <T> Target type.
36+
*/
37+
static <T extends HierarchyNode> T findNodeOfType(HierarchyNode[] nodes, Class<T> targetType) {
38+
for (HierarchyNode node : nodes) {
39+
if (targetType.isAssignableFrom(node.getClass())) {
40+
return (T)node;
41+
}
42+
}
43+
44+
return null;
45+
}
46+
47+
/**
48+
* Find an element in an array that matches a predicate.
49+
* @param nodes Array of hierarchy nodes to search inside.
50+
* @param filter Filter predicate
51+
* @return Found node or null.
52+
*/
53+
static HierarchyNode findNode(HierarchyNode[] nodes, Predicate<HierarchyNode> filter) {
54+
for (HierarchyNode node : nodes) {
55+
if (filter.test(node)) {
56+
return node;
57+
}
58+
}
59+
60+
return null;
61+
}
62+
3163
/**
3264
* Detach all child nodes that are default (empty) and will not be rendered into YAML anyway.
3365
* The purpose of this method is to get rid of extra nodes that were created for a short time to avoid a serialization/deserialization approach for dropping empty nodes.

dqops/src/main/java/com/dqops/metadata/search/CheckSearchFilters.java

+43-7
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@
1515
*/
1616
package com.dqops.metadata.search;
1717

18-
import com.dqops.checks.CheckTarget;
19-
import com.dqops.checks.CheckTimeScale;
20-
import com.dqops.checks.CheckType;
18+
import com.dqops.checks.*;
19+
import com.dqops.checks.comparison.AbstractComparisonCheckCategorySpec;
2120
import com.dqops.metadata.id.HierarchyId;
2221
import com.dqops.metadata.id.HierarchyIdModel;
22+
import com.dqops.metadata.id.HierarchyNode;
2323
import com.dqops.metadata.search.pattern.SearchPattern;
2424
import com.dqops.metadata.sources.ColumnTypeSnapshotSpec;
25+
import com.dqops.metadata.sources.ConnectionSpec;
26+
import com.dqops.metadata.sources.PhysicalTableName;
2527
import com.dqops.utils.docs.generators.SampleStringsRegistry;
2628
import com.dqops.utils.docs.generators.SampleValueFactory;
2729
import com.fasterxml.jackson.annotation.JsonIgnore;
@@ -31,12 +33,10 @@
3133
import com.fasterxml.jackson.databind.annotation.JsonNaming;
3234
import io.swagger.annotations.ApiModel;
3335
import lombok.EqualsAndHashCode;
36+
import org.apache.commons.lang3.ArrayUtils;
3437
import org.apache.parquet.Strings;
3538

36-
import java.util.HashSet;
37-
import java.util.LinkedHashSet;
38-
import java.util.List;
39-
import java.util.Set;
39+
import java.util.*;
4040
import java.util.stream.Collectors;
4141

4242
/**
@@ -423,6 +423,42 @@ public CheckSearchFilters clone() {
423423
}
424424
}
425425

426+
/**
427+
* Creates a check search filter given an instance of a check inside a given connection.
428+
* @param connectionSpec Connection specification where the check is applied.
429+
* @param checkSpec Check specification instance, must be inside a user home and the connection that was given.
430+
* @return Check search filters with all filters that identify the check.
431+
*/
432+
public static CheckSearchFilters fromCheckSpecInstance(ConnectionSpec connectionSpec, AbstractCheckSpec<?,?,?,?> checkSpec) {
433+
HierarchyId checkHierarchyId = checkSpec.getHierarchyId();
434+
HierarchyNode[] allNodesToCheck = checkHierarchyId.getNodesOnPath(connectionSpec);
435+
AbstractRootChecksContainerSpec rootChecksContainerSpec = HierarchyNode.findNodeOfType(allNodesToCheck, AbstractRootChecksContainerSpec.class);
436+
437+
CheckSearchFilters checkSearchFilters = new CheckSearchFilters();
438+
checkSearchFilters.setConnection(connectionSpec.getConnectionName());
439+
PhysicalTableName physicalTableName = checkHierarchyId.getPhysicalTableName();
440+
checkSearchFilters.setPhysicalTableName(physicalTableName);
441+
checkSearchFilters.setFullTableName(physicalTableName.toTableSearchFilter());
442+
checkSearchFilters.setColumn(checkHierarchyId.getColumnName());
443+
checkSearchFilters.setCheckCategory(checkSearchFilters.getCheckCategory());
444+
checkSearchFilters.setCheckName(checkSpec.getCheckName());
445+
checkSearchFilters.setCheckTarget(rootChecksContainerSpec.getCheckTarget());
446+
checkSearchFilters.setCheckType(rootChecksContainerSpec.getCheckType());
447+
checkSearchFilters.setTimeScale(rootChecksContainerSpec.getCheckTimeScale());
448+
449+
if (checkSpec.isTableComparisonCheck()) {
450+
AbstractComparisonCheckCategorySpec comparisonCheckCategorySpec = HierarchyNode.findNodeOfType(allNodesToCheck, AbstractComparisonCheckCategorySpec.class);
451+
checkSearchFilters.setTableComparisonName(comparisonCheckCategorySpec.getComparisonName());
452+
}
453+
454+
return checkSearchFilters;
455+
}
456+
457+
/**
458+
* Create a check search filter from a table search filter object, copying shared attributes.
459+
* @param tableSearchFilters Source table search filter.
460+
* @return Check search filter.
461+
*/
426462
public static CheckSearchFilters fromTableSearchFilters(TableSearchFilters tableSearchFilters) {
427463
return new CheckSearchFilters() {{
428464
setConnection(tableSearchFilters.getConnection());

0 commit comments

Comments
 (0)