Skip to content

Commit e0304a7

Browse files
committed
The current data quality status for a table calculates also results for each DQ dimension.
1 parent 7543a92 commit e0304a7

File tree

6 files changed

+453
-1
lines changed

6 files changed

+453
-1
lines changed

dqops/src/main/java/com/dqops/data/checkresults/models/currentstatus/ColumnCurrentDataQualityStatusModel.java

+32
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ public class ColumnCurrentDataQualityStatusModel implements CurrentDataQualitySt
116116
"that describe the most current status.")
117117
private Map<String, CheckCurrentDataQualityStatusModel> checks = new LinkedHashMap<>();
118118

119+
/**
120+
* The data quality status for each data quality dimension. The status includes the status of column-level for the same dimension, such as Completeness.
121+
*/
122+
@JsonPropertyDescription("Dictionary of the current data quality statues for each data quality dimension.")
123+
private Map<String, DimensionCurrentDataQualityStatusModel> dimensions = new LinkedHashMap<>();
124+
125+
119126
/**
120127
* Calculates the highest current severity status and historic severity status from all checks.
121128
*/
@@ -139,6 +146,31 @@ public void calculateHighestCurrentAndHistoricSeverity() {
139146
}
140147
}
141148

149+
/**
150+
* Processes the check results and computes results for each data quality dimension.
151+
*/
152+
public void calculateStatusesForDimensions() {
153+
for (CheckCurrentDataQualityStatusModel checkStatusModel : checks.values()) {
154+
String qualityDimension = checkStatusModel.getQualityDimension();
155+
if (qualityDimension == null) {
156+
continue; // should not happen, but if somebody intentionally configures an empty dimension....
157+
}
158+
159+
DimensionCurrentDataQualityStatusModel dimensionModel = this.dimensions.get(qualityDimension);
160+
if (dimensionModel == null) {
161+
dimensionModel = new DimensionCurrentDataQualityStatusModel();
162+
dimensionModel.setDimension(qualityDimension);
163+
this.dimensions.put(qualityDimension, dimensionModel);
164+
}
165+
166+
dimensionModel.appendCheckResult(checkStatusModel);
167+
}
168+
169+
for (DimensionCurrentDataQualityStatusModel dimensionModel : this.dimensions.values()) {
170+
dimensionModel.calculateDataQualityKpiScore();
171+
}
172+
}
173+
142174

143175
public static class ColumnCurrentDataQualityStatusModelSampleFactory implements SampleValueFactory<ColumnCurrentDataQualityStatusModel> {
144176
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/*
2+
* Copyright © 2021 DQOps (support@dqops.com)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.dqops.data.checkresults.models.currentstatus;
18+
19+
import com.dqops.rules.RuleSeverityLevel;
20+
import com.fasterxml.jackson.annotation.JsonInclude;
21+
import com.fasterxml.jackson.annotation.JsonPropertyDescription;
22+
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
23+
import com.fasterxml.jackson.databind.annotation.JsonNaming;
24+
import io.swagger.annotations.ApiModel;
25+
import lombok.Data;
26+
27+
import java.time.Instant;
28+
import java.util.Objects;
29+
30+
/**
31+
* A model that describes the current data quality status for a single data quality dimension.
32+
*/
33+
@JsonInclude(JsonInclude.Include.NON_NULL)
34+
@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
35+
@ApiModel(value = "DimensionCurrentDataQualityStatusModel", description = "The summary of the current data quality status for one data quality dimension")
36+
@Data
37+
public class DimensionCurrentDataQualityStatusModel {
38+
/**
39+
* Data quality dimension name. The most popular dimensions are: Completeness, Uniqueness, Timeliness, Validity, Consistency, Accuracy, Availability.
40+
*/
41+
@JsonPropertyDescription("Data quality dimension name. The most popular dimensions are: Completeness, Uniqueness, Timeliness, Validity, Consistency, Accuracy, Availability.")
42+
private String dimension;
43+
44+
/**
45+
* The most recent data quality issue severity for this data quality dimension. When the table is monitored using data grouping, it is the highest issue severity of all recently analyzed data groups.
46+
* For partitioned checks, it is the highest severity of all results for all partitions (time periods) in the analyzed time range.
47+
*/
48+
@JsonPropertyDescription("The most recent data quality issue severity for this table. When the table is monitored using data grouping, it is the highest issue severity of all recently analyzed data groups. " +
49+
"For partitioned checks, it is the highest severity of all results for all partitions (time periods) in the analyzed time range.")
50+
private RuleSeverityLevel currentSeverity;
51+
52+
/**
53+
* The highest severity of previous executions of this data quality issue in the analyzed time range.
54+
* It can be different from the *current_severity* if the data quality issue was solved and the most recently data quality issue did not detect it anymore.
55+
* For partitioned checks, this field returns the same value as the *current_severity*, because data quality issues in older partitions are still valid.
56+
*/
57+
@JsonPropertyDescription("The highest severity of previous executions of this data quality issue in the analyzed time range. " +
58+
"It can be different from the *current_severity* if the data quality issue was solved and the most recently data quality issue did not detect it anymore. " +
59+
"For partitioned checks, this field returns the same value as the *current_severity*, because data quality issues in older partitions are still valid.")
60+
private RuleSeverityLevel highestHistoricalSeverity;
61+
62+
/**
63+
* The UTC timestamp when the most recent data quality check was executed on the table for one data quality dimension.
64+
*/
65+
@JsonPropertyDescription("The UTC timestamp when the most recent data quality check was executed on the table.")
66+
private Instant lastCheckExecutedAt;
67+
68+
/**
69+
* The total number of most recent checks that were executed on the table for one data quality dimension. Table comparison checks that are comparing groups of data are counted as the number of compared data groups.
70+
*/
71+
@JsonPropertyDescription("The total number of most recent checks that were executed on the table for one data quality dimension. Table comparison checks that are comparing groups of data are counted as the number of compared data groups.")
72+
private int executedChecks;
73+
74+
/**
75+
* The number of most recent valid data quality checks that passed without raising any issues.
76+
*/
77+
@JsonPropertyDescription("The number of most recent valid data quality checks that passed without raising any issues.")
78+
private int validResults;
79+
80+
/**
81+
* The number of most recent data quality checks that failed by raising a warning severity data quality issue.
82+
*/
83+
@JsonPropertyDescription("The number of most recent data quality checks that failed by raising a warning severity data quality issue.")
84+
private int warnings;
85+
86+
/**
87+
* The number of most recent data quality checks that failed by raising an error severity data quality issue.
88+
*/
89+
@JsonPropertyDescription("The number of most recent data quality checks that failed by raising an error severity data quality issue.")
90+
private int errors;
91+
92+
/**
93+
* The number of most recent data quality checks that failed by raising a fatal severity data quality issue.
94+
*/
95+
@JsonPropertyDescription("The number of most recent data quality checks that failed by raising a fatal severity data quality issue.")
96+
private int fatals;
97+
98+
/**
99+
* The number of data quality check execution errors that were reported due to access issues to the data source, invalid mapping in DQOps,
100+
* invalid queries in data quality sensors or invalid python rules.
101+
* When an execution error is reported, the configuration of a data quality check on a table must be updated.
102+
*/
103+
@JsonPropertyDescription("The number of data quality check execution errors that were reported due to access issues to the data source, " +
104+
"invalid mapping in DQOps, invalid queries in data quality sensors or invalid python rules. " +
105+
"When an execution error is reported, the configuration of a data quality check on a table must be updated.")
106+
private int executionErrors;
107+
108+
/**
109+
* Data quality KPI score for the table, measured as a percentage of passed data quality checks.
110+
* DQOps counts data quality issues at a warning severity level as passed checks. The data quality KPI score is a value in the range 0..100.
111+
*/
112+
@JsonPropertyDescription("Data quality KPI score for the data quality dimension, measured as a percentage of passed data quality checks. " +
113+
"DQOps counts data quality issues at a warning severity level as passed checks. The data quality KPI score is a value in the range 0..100.")
114+
private Double dataQualityKpi;
115+
116+
/**
117+
* Calculates a data quality KPI score for a data quality dimension.
118+
*/
119+
public void calculateDataQualityKpiScore() {
120+
int totalExecutedChecksWithNoExecutionErrors = this.getValidResults() + this.getWarnings() + this.getErrors() + this.getFatals();
121+
Double dataQualityKpi = totalExecutedChecksWithNoExecutionErrors > 0 ?
122+
(this.getValidResults() + this.getWarnings()) * 100.0 / totalExecutedChecksWithNoExecutionErrors : null;
123+
setDataQualityKpi(dataQualityKpi);
124+
}
125+
126+
/**
127+
* Appends the results from one data quality check.
128+
* @param checkStatusModel Data quality check result model.
129+
*/
130+
public void appendCheckResult(CheckCurrentDataQualityStatusModel checkStatusModel) {
131+
if (this.currentSeverity == null ||
132+
(checkStatusModel.getCurrentSeverity() != null && this.currentSeverity.getSeverity() < checkStatusModel.getCurrentSeverity().getSeverity() &&
133+
checkStatusModel.getCurrentSeverity().getSeverity() != 4)) {
134+
this.currentSeverity = RuleSeverityLevel.fromCheckSeverity(checkStatusModel.getCurrentSeverity());
135+
}
136+
137+
if (this.highestHistoricalSeverity == null ||
138+
(checkStatusModel.getHighestHistoricalSeverity() != null &&
139+
this.highestHistoricalSeverity.getSeverity() < checkStatusModel.getHighestHistoricalSeverity().getSeverity())) {
140+
this.highestHistoricalSeverity = checkStatusModel.getHighestHistoricalSeverity();
141+
}
142+
143+
if (this.lastCheckExecutedAt == null ||
144+
(checkStatusModel.getLastExecutedAt() != null && checkStatusModel.getLastExecutedAt().isAfter(this.lastCheckExecutedAt))) {
145+
this.lastCheckExecutedAt = checkStatusModel.getLastExecutedAt();
146+
}
147+
148+
this.executedChecks++; // we count only the current status (the last executed check), maybe we should count executed checks also for partitioned checks
149+
if (this.currentSeverity != null) {
150+
switch (this.currentSeverity) {
151+
case valid:
152+
this.validResults++;
153+
break;
154+
155+
case warning:
156+
this.warnings++;
157+
break;
158+
159+
case error:
160+
this.errors++;
161+
break;
162+
163+
case fatal:
164+
this.fatals++;
165+
break;
166+
}
167+
} else {
168+
if (checkStatusModel.getCurrentSeverity() != null && checkStatusModel.getCurrentSeverity().getSeverity() == 4) {
169+
this.executionErrors++;
170+
}
171+
}
172+
}
173+
174+
/**
175+
* Appends results from the dimension on another column.
176+
* @param columnDimensionModel The results for the same dimension, but on a different column.
177+
*/
178+
public void appendResults(DimensionCurrentDataQualityStatusModel columnDimensionModel) {
179+
assert Objects.equals(this.dimension, columnDimensionModel.dimension);
180+
181+
if (this.currentSeverity == null ||
182+
(columnDimensionModel.getCurrentSeverity() != null && this.currentSeverity.getSeverity() < columnDimensionModel.getCurrentSeverity().getSeverity())) {
183+
this.currentSeverity = columnDimensionModel.getCurrentSeverity();
184+
}
185+
186+
if (this.highestHistoricalSeverity == null ||
187+
(columnDimensionModel.getHighestHistoricalSeverity() != null &&
188+
this.highestHistoricalSeverity.getSeverity() < columnDimensionModel.getHighestHistoricalSeverity().getSeverity())) {
189+
this.highestHistoricalSeverity = columnDimensionModel.getHighestHistoricalSeverity();
190+
}
191+
192+
if (this.lastCheckExecutedAt == null ||
193+
(columnDimensionModel.getLastCheckExecutedAt() != null && columnDimensionModel.getLastCheckExecutedAt().isAfter(this.lastCheckExecutedAt))) {
194+
this.lastCheckExecutedAt = columnDimensionModel.getLastCheckExecutedAt();
195+
}
196+
197+
this.executedChecks += columnDimensionModel.executedChecks;
198+
this.validResults += columnDimensionModel.validResults;
199+
this.warnings += columnDimensionModel.warnings;
200+
this.errors += columnDimensionModel.errors;
201+
this.fatals += columnDimensionModel.fatals;
202+
this.executionErrors += columnDimensionModel.executionErrors;
203+
}
204+
}

dqops/src/main/java/com/dqops/data/checkresults/models/currentstatus/TableCurrentDataQualityStatusModel.java

+48-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"The values of severity levels are: 0 - all data quality checks passed, 1 - a warning was detected, 2 - an error was detected, " +
4747
"3 - a fatal data quality issue was detected.")
4848
@Data
49-
public class TableCurrentDataQualityStatusModel implements CurrentDataQualityStatusHolder {
49+
public class TableCurrentDataQualityStatusModel implements CurrentDataQualityStatusHolder, Cloneable {
5050
/**
5151
* The connection name in DQOps.
5252
*/
@@ -151,6 +151,12 @@ public class TableCurrentDataQualityStatusModel implements CurrentDataQualitySta
151151
@JsonPropertyDescription("Dictionary of data statues for all columns that have any known data quality results. The keys in the dictionary are the column names.")
152152
private Map<String, ColumnCurrentDataQualityStatusModel> columns = new LinkedHashMap<>();
153153

154+
/**
155+
* The data quality status for each data quality dimension. The status includes the status of table-level checks and column-level checks for all columns that are reported for the same dimension, such as Completeness.
156+
*/
157+
@JsonPropertyDescription("Dictionary of the current data quality statues for each data quality dimension.")
158+
private Map<String, DimensionCurrentDataQualityStatusModel> dimensions = new LinkedHashMap<>();
159+
154160
/**
155161
* Analyzes all table level checks and column level checks to calculate the highest severity level at a table level.
156162
*/
@@ -204,6 +210,47 @@ public void calculateDataQualityKpiScore() {
204210
setDataQualityKpi(dataQualityKpi);
205211
}
206212

213+
/**
214+
* Calculates the status for each data quality dimension, aggregates statuses of data quality checks for each data quality dimension.
215+
*/
216+
public void calculateStatusesForDataQualityDimensions() {
217+
for (ColumnCurrentDataQualityStatusModel columnModel : columns.values()) {
218+
columnModel.calculateStatusesForDimensions();
219+
220+
for (DimensionCurrentDataQualityStatusModel columnDimensionModel : columnModel.getDimensions().values()) {
221+
String dimensionName = columnDimensionModel.getDimension();
222+
DimensionCurrentDataQualityStatusModel tableDimensionModel = this.dimensions.get(dimensionName);
223+
if (tableDimensionModel == null) {
224+
tableDimensionModel = new DimensionCurrentDataQualityStatusModel();
225+
tableDimensionModel.setDimension(dimensionName);
226+
this.dimensions.put(dimensionName, tableDimensionModel);
227+
}
228+
229+
tableDimensionModel.appendResults(columnDimensionModel);
230+
}
231+
}
232+
233+
for (CheckCurrentDataQualityStatusModel checkStatusModel : checks.values()) {
234+
String qualityDimension = checkStatusModel.getQualityDimension();
235+
if (qualityDimension == null) {
236+
continue; // should not happen, but if somebody intentionally configures an empty dimension....
237+
}
238+
239+
DimensionCurrentDataQualityStatusModel dimensionModel = this.dimensions.get(qualityDimension);
240+
if (dimensionModel == null) {
241+
dimensionModel = new DimensionCurrentDataQualityStatusModel();
242+
dimensionModel.setDimension(qualityDimension);
243+
this.dimensions.put(qualityDimension, dimensionModel);
244+
}
245+
246+
dimensionModel.appendCheckResult(checkStatusModel);
247+
}
248+
249+
for (DimensionCurrentDataQualityStatusModel dimensionModel : this.dimensions.values()) {
250+
dimensionModel.calculateDataQualityKpiScore();
251+
}
252+
}
253+
207254
public static class TableCurrentDataQualityStatusModelSampleFactory implements SampleValueFactory<TableCurrentDataQualityStatusModel> {
208255
@Override
209256
public TableCurrentDataQualityStatusModel createSample() {

dqops/src/main/java/com/dqops/data/checkresults/services/CheckResultsDataServiceImpl.java

+1
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,7 @@ protected TableCurrentDataQualityStatusModel calculateStatus(Table sourceTable,
853853

854854
tableStatusModel.calculateHighestCurrentAndHistoricSeverity();
855855
tableStatusModel.calculateDataQualityKpiScore();
856+
tableStatusModel.calculateStatusesForDataQualityDimensions();
856857

857858
return tableStatusModel;
858859
}

0 commit comments

Comments
 (0)