Skip to content

Commit de11239

Browse files
authored
Updated demos (#169)
## Changes Make the results of quality checks in demos more interesting (show multiple errors for a row). ### Tests - [x] manually tested - [ ] added unit tests - [ ] added integration tests
1 parent dc94af3 commit de11239

File tree

1 file changed

+20
-21
lines changed

1 file changed

+20
-21
lines changed

demos/dqx_demo_library.py

+20-21
Original file line numberDiff line numberDiff line change
@@ -136,24 +136,24 @@
136136
- col1
137137
- col2
138138
139-
- criticality: error
139+
- criticality: warn
140140
check:
141141
function: is_not_null_and_not_empty
142142
arguments:
143143
col_name: col3
144144
145-
- criticality: error
145+
- criticality: warn
146146
filter: col1 < 3
147147
check:
148148
function: is_not_null_and_not_empty
149149
arguments:
150150
col_name: col4
151151
152-
- criticality: warn
152+
- criticality: error
153153
check:
154154
function: value_is_in_list
155155
arguments:
156-
col_name: col4
156+
col_name: col1
157157
allowed:
158158
- 1
159159
- 2
@@ -164,7 +164,7 @@
164164
assert not status.has_errors
165165

166166
schema = "col1: int, col2: int, col3: int, col4 int"
167-
input_df = spark.createDataFrame([[1, 3, 3, 1], [2, None, 4, 1]], schema)
167+
input_df = spark.createDataFrame([[1, 3, 3, None], [3, None, 4, 1]], schema)
168168

169169
dq_engine = DQEngine(WorkspaceClient())
170170

@@ -194,20 +194,20 @@
194194
check_func=is_not_null).get_rules() + [
195195
DQRule( # define rule for a single column
196196
name="col3_is_null_or_empty",
197-
criticality="error",
197+
criticality="warn",
198198
check=is_not_null_and_not_empty("col3")),
199199
DQRule( # define rule with a filter
200200
name="col_4_is_null_or_empty",
201-
criticality="error",
201+
criticality="warn",
202202
filter="col1 < 3",
203203
check=is_not_null_and_not_empty("col4")),
204204
DQRule( # name auto-generated if not provided
205-
criticality="warn",
206-
check=value_is_in_list("col4", ["1", "2"]))
205+
criticality="error",
206+
check=value_is_in_list("col1", ["1", "2"]))
207207
]
208208

209209
schema = "col1: int, col2: int, col3: int, col4 int"
210-
input_df = spark.createDataFrame([[1, 3, 3, 1], [2, None, 4, 1]], schema)
210+
input_df = spark.createDataFrame([[1, 3, 3, None], [3, None, 4, 1]], schema)
211211

212212
dq_engine = DQEngine(WorkspaceClient())
213213

@@ -336,7 +336,7 @@ def ends_with_foo(col_name: str) -> Column:
336336
function: is_not_null_and_not_empty
337337
arguments:
338338
col_name: col1
339-
- criticality: error
339+
- criticality: warn
340340
check:
341341
function: ends_with_foo
342342
arguments:
@@ -350,8 +350,8 @@ def ends_with_foo(col_name: str) -> Column:
350350
"""
351351
)
352352

353-
schema = "col1: string"
354-
input_df = spark.createDataFrame([["str1"], ["foo"], ["str3"]], schema)
353+
schema = "col1: string, col2: string"
354+
input_df = spark.createDataFrame([[None, "foo"], ["foo", None], [None, None]], schema)
355355

356356
dq_engine = DQEngine(WorkspaceClient())
357357

@@ -380,14 +380,13 @@ def ends_with_foo(col_name: str) -> Column:
380380
ws = WorkspaceClient()
381381
dq_engine = DQEngine(ws, extra_params=extra_parameters)
382382

383-
schema = "col1: string"
384-
input_df = spark.createDataFrame([["str1"], ["foo"], ["str3"]], schema)
383+
schema = "col1: string, col2: string"
384+
input_df = spark.createDataFrame([[None, "foo"], ["foo", None], [None, None]], schema)
385385

386-
checks = [ DQRule(
387-
name="col_1_is_null_or_empty",
388-
criticality="error",
389-
check=is_not_null_and_not_empty("col1")),
390-
]
386+
checks = [
387+
DQRule(criticality="error", check=is_not_null_and_not_empty("col1")),
388+
DQRule(criticality="warn", check=is_not_null_and_not_empty("col2")),
389+
]
391390

392391
valid_and_quarantined_df = dq_engine.apply_checks(input_df, checks)
393-
display(valid_and_quarantined_df)
392+
display(valid_and_quarantined_df)

0 commit comments

Comments
 (0)