@@ -55,7 +55,7 @@ dlt_expectations = dlt_generator.generate_dlt_rules(profiles, language="Python_D
55
55
print (dlt_expectations)
56
56
```
57
57
58
- ### Using CLI
58
+ ### Using CLI
59
59
60
60
You can optionally install DQX in the workspace, see the [ Installation Guide] ( /docs/installation#dqx-installation-in-a-databricks-workspace ) .
61
61
As part of the installation, a config, dashboards and profiler workflow is installed. The workflow can be run manually in the workspace UI or using the CLI as below.
@@ -116,7 +116,7 @@ print(status)
116
116
```
117
117
118
118
Note that checks are validated automatically when applied as part of the
119
- ` apply_checks_by_metadata_and_split ` and ` apply_checks_by_metadata ` methods
119
+ ` apply_checks_by_metadata_and_split ` and ` apply_checks_by_metadata ` methods
120
120
(see [ Quality rules defined as config] ( #quality-rules-defined-as-config ) ).
121
121
122
122
### Using CLI
@@ -178,7 +178,7 @@ checks = dq_engine.load_checks_from_installation(assume_user=True, run_config_na
178
178
179
179
input_df = spark.read.table("catalog1.schema1.table1")
180
180
181
- # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
181
+ # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
182
182
valid_df, quarantined_df = dq_engine.apply_checks_by_metadata_and_split(input_df, checks)
183
183
184
184
# Option 2: apply quality rules on the dataframe and report issues as additional columns (` _warning` and `_error`)
@@ -198,7 +198,7 @@ checks = dq_engine.load_checks_from_workspace_file(workspace_path="/Shared/App1/
198
198
199
199
input_df = spark.read.table(" catalog1.schema1.table1" )
200
200
201
- # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
201
+ # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
202
202
valid_df, quarantined_df = dq_engine.apply_checks_by_metadata_and_split(input_df, checks)
203
203
204
204
# Option 2: apply quality rules on the dataframe and report issues as additional columns (`_warning` and `_error`)
@@ -220,7 +220,7 @@ dq_engine = DQEngine(WorkspaceClient())
220
220
221
221
input_df = spark.read.table(" catalog1.schema1.table1" )
222
222
223
- # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
223
+ # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
224
224
valid_df, quarantined_df = dq_engine.apply_checks_by_metadata_and_split(input_df, checks)
225
225
226
226
# Option 2: apply quality rules on the dataframe and report issues as additional columns (`_warning` and `_error`)
@@ -241,21 +241,26 @@ from databricks.sdk import WorkspaceClient
241
241
dq_engine = DQEngine(WorkspaceClient())
242
242
243
243
checks = DQRuleColSet( # define rule for multiple columns at once
244
- columns = [" col1" , " col2" ],
245
- criticality = " error" ,
244
+ columns = [" col1" , " col2" ],
245
+ criticality = " error" ,
246
246
check_func = is_not_null).get_rules() + [
247
247
DQRule( # define rule for a single column
248
- name = ' col3_is_null_or_empty' ,
249
- criticality = ' error' ,
250
- check = is_not_null_and_not_empty(' col3' )),
248
+ name = " col3_is_null_or_empty" ,
249
+ criticality = " error" ,
250
+ check = is_not_null_and_not_empty(" col3" )),
251
+ DQRule( # define rule with a filter
252
+ name = " col_4_is_null_or_empty" ,
253
+ criticality = " error" ,
254
+ filter = " col1 < 3" ,
255
+ check = is_not_null_and_not_empty(" col4" )),
251
256
DQRule( # name auto-generated if not provided
252
- criticality = ' warn' ,
253
- check = value_is_in_list(' col4' , [' 1 ' , ' 2 ' ]))
257
+ criticality = " warn" ,
258
+ check = value_is_in_list(" col4" , [" 1 " , " 2 " ]))
254
259
]
255
260
256
261
input_df = spark.read.table(" catalog1.schema1.table1" )
257
262
258
- # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
263
+ # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
259
264
valid_df, quarantined_df = dq_engine.apply_checks_and_split(input_df, checks)
260
265
261
266
# Option 2: apply quality rules on the dataframe and report issues as additional columns (`_warning` and `_error`)
@@ -288,6 +293,13 @@ checks = yaml.safe_load("""
288
293
arguments:
289
294
col_name: col3
290
295
296
+ - criticality: error
297
+ filter: col1 < 3
298
+ check:
299
+ function: is_not_null_and_not_empty
300
+ arguments:
301
+ col_name: col4
302
+
291
303
- criticality: warn
292
304
check:
293
305
function: value_is_in_list
@@ -300,7 +312,7 @@ checks = yaml.safe_load("""
300
312
301
313
input_df = spark.read.table(" catalog1.schema1.table1" )
302
314
303
- # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
315
+ # Option 1: apply quality rules on the dataframe and provide valid and invalid (quarantined) dataframes
304
316
valid_df, quarantined_df = dq_engine.apply_checks_by_metadata_and_split(input_df, checks)
305
317
306
318
# Option 2: apply quality rules on the dataframe and report issues as additional columns (`_warning` and `_error`)
@@ -411,3 +423,27 @@ dq_engine = DQEngine(ws)
411
423
For details on the specific methods available in the engine, visit to the [ reference] ( /docs/reference#dq-engine-methods ) section.
412
424
413
425
Information on testing applications that use ` DQEngine ` can be found [ here] ( /docs/reference#testing-applications-using-dqx ) .
426
+
427
+ ## Additional Configuration
428
+
429
+ ### Customizing Reporting Error and Warning Columns
430
+
431
+ By default, DQX appends ` _error ` and ` _warning ` reporting columns to the output DataFrame to flag quality issues.
432
+
433
+ You can customize the names of these reporting columns by specifying additional configurations in the engine.
434
+
435
+ ``` python
436
+ from databricks.sdk import WorkspaceClient
437
+ from databricks.labs.dqx.engine import (
438
+ DQEngine,
439
+ ExtraParams,
440
+ )
441
+
442
+ # customize reporting column names
443
+ extra_parameters = ExtraParams(column_names = {" errors" : " dq_errors" , " warnings" : " dq_warnings" })
444
+
445
+ ws = WorkspaceClient()
446
+ dq_engine = DQEngine(ws, extra_params = extra_parameters)
447
+ ```
448
+
449
+
0 commit comments