136
136
- col1
137
137
- col2
138
138
139
- - criticality: error
139
+ - criticality: warn
140
140
check:
141
141
function: is_not_null_and_not_empty
142
142
arguments:
143
143
col_name: col3
144
144
145
- - criticality: error
145
+ - criticality: warn
146
146
filter: col1 < 3
147
147
check:
148
148
function: is_not_null_and_not_empty
149
149
arguments:
150
150
col_name: col4
151
151
152
- - criticality: warn
152
+ - criticality: error
153
153
check:
154
154
function: value_is_in_list
155
155
arguments:
156
- col_name: col4
156
+ col_name: col1
157
157
allowed:
158
158
- 1
159
159
- 2
164
164
assert not status .has_errors
165
165
166
166
schema = "col1: int, col2: int, col3: int, col4 int"
167
- input_df = spark .createDataFrame ([[1 , 3 , 3 , 1 ], [2 , None , 4 , 1 ]], schema )
167
+ input_df = spark .createDataFrame ([[1 , 3 , 3 , None ], [3 , None , 4 , 1 ]], schema )
168
168
169
169
dq_engine = DQEngine (WorkspaceClient ())
170
170
194
194
check_func = is_not_null ).get_rules () + [
195
195
DQRule ( # define rule for a single column
196
196
name = "col3_is_null_or_empty" ,
197
- criticality = "error " ,
197
+ criticality = "warn " ,
198
198
check = is_not_null_and_not_empty ("col3" )),
199
199
DQRule ( # define rule with a filter
200
200
name = "col_4_is_null_or_empty" ,
201
- criticality = "error " ,
201
+ criticality = "warn " ,
202
202
filter = "col1 < 3" ,
203
203
check = is_not_null_and_not_empty ("col4" )),
204
204
DQRule ( # name auto-generated if not provided
205
- criticality = "warn " ,
206
- check = value_is_in_list ("col4 " , ["1" , "2" ]))
205
+ criticality = "error " ,
206
+ check = value_is_in_list ("col1 " , ["1" , "2" ]))
207
207
]
208
208
209
209
schema = "col1: int, col2: int, col3: int, col4 int"
210
- input_df = spark .createDataFrame ([[1 , 3 , 3 , 1 ], [2 , None , 4 , 1 ]], schema )
210
+ input_df = spark .createDataFrame ([[1 , 3 , 3 , None ], [3 , None , 4 , 1 ]], schema )
211
211
212
212
dq_engine = DQEngine (WorkspaceClient ())
213
213
@@ -336,7 +336,7 @@ def ends_with_foo(col_name: str) -> Column:
336
336
function: is_not_null_and_not_empty
337
337
arguments:
338
338
col_name: col1
339
- - criticality: error
339
+ - criticality: warn
340
340
check:
341
341
function: ends_with_foo
342
342
arguments:
@@ -350,8 +350,8 @@ def ends_with_foo(col_name: str) -> Column:
350
350
"""
351
351
)
352
352
353
- schema = "col1: string"
354
- input_df = spark .createDataFrame ([["str1 " ], ["foo" ], ["str3" ]], schema )
353
+ schema = "col1: string, col2: string "
354
+ input_df = spark .createDataFrame ([[None , "foo " ], ["foo" , None ], [None , None ]], schema )
355
355
356
356
dq_engine = DQEngine (WorkspaceClient ())
357
357
@@ -380,14 +380,13 @@ def ends_with_foo(col_name: str) -> Column:
380
380
ws = WorkspaceClient ()
381
381
dq_engine = DQEngine (ws , extra_params = extra_parameters )
382
382
383
- schema = "col1: string"
384
- input_df = spark .createDataFrame ([["str1 " ], ["foo" ], ["str3" ]], schema )
383
+ schema = "col1: string, col2: string "
384
+ input_df = spark .createDataFrame ([[None , "foo " ], ["foo" , None ], [None , None ]], schema )
385
385
386
- checks = [ DQRule (
387
- name = "col_1_is_null_or_empty" ,
388
- criticality = "error" ,
389
- check = is_not_null_and_not_empty ("col1" )),
390
- ]
386
+ checks = [
387
+ DQRule (criticality = "error" , check = is_not_null_and_not_empty ("col1" )),
388
+ DQRule (criticality = "warn" , check = is_not_null_and_not_empty ("col2" )),
389
+ ]
391
390
392
391
valid_and_quarantined_df = dq_engine .apply_checks (input_df , checks )
393
- display (valid_and_quarantined_df )
392
+ display (valid_and_quarantined_df )
0 commit comments