Skip to content

Commit 5d6479c

Browse files
committed
fixed fmt
1 parent e4417a9 commit 5d6479c

File tree

4 files changed

+158
-143
lines changed

4 files changed

+158
-143
lines changed
+14-14
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
import datetime
22

33

4-
def val_to_str(v, include_sql_quotes=True):
4+
def val_to_str(value, include_sql_quotes=True):
55
quote = "'" if include_sql_quotes else ""
6-
if isinstance(v, datetime.datetime):
7-
return f"{quote}{v.strftime('%Y-%m-%dT%H:%M:%S.%f%z')}{quote}"
8-
if isinstance(v, datetime.date):
9-
return f"{quote}{v.isoformat()}{quote}"
6+
if isinstance(value, datetime.datetime):
7+
return f"{quote}{value.strftime('%Y-%m-%dT%H:%M:%S.%f%z')}{quote}"
8+
if isinstance(value, datetime.date):
9+
return f"{quote}{value.isoformat()}{quote}"
1010

11-
if isinstance(v, int) or isinstance(v, float):
12-
return str(v)
11+
if isinstance(value, (int, float)):
12+
return str(value)
1313

1414
# TODO: do correct escaping
15-
return f"{quote}{v}{quote}"
15+
return f"{quote}{value}{quote}"
1616

1717

18-
def val_maybe_to_str(v, include_sql_quotes=True):
18+
def val_maybe_to_str(value, include_sql_quotes=True):
1919
quote = "'" if include_sql_quotes else ""
20-
if isinstance(v, datetime.datetime):
21-
return f"{quote}{v.strftime('%Y-%m-%dT%H:%M:%S.%f%z')}{quote}"
22-
if isinstance(v, datetime.date):
23-
return f"{quote}{v.isoformat()}{quote}"
20+
if isinstance(value, datetime.datetime):
21+
return f"{quote}{value.strftime('%Y-%m-%dT%H:%M:%S.%f%z')}{quote}"
22+
if isinstance(value, datetime.date):
23+
return f"{quote}{value.isoformat()}{quote}"
2424

25-
return v
25+
return value
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,47 @@
11
import json
22
import re
3-
from typing import Union
43

54
from databricks.labs.dqx.profiler.common import val_to_str
65
from databricks.labs.dqx.profiler.profiler import DQRule
76

87
__name_sanitize_re__ = re.compile(r"[^a-zA-Z0-9]+")
98

109

11-
def dlt_generate_is_in(cl, **params: dict):
10+
def dlt_generate_is_in(col_name, **params: dict):
1211
in_str = ", ".join([val_to_str(v) for v in params["in"]])
13-
return f"{cl} in ({in_str})"
12+
return f"{col_name} in ({in_str})"
1413

1514

16-
def dlt_generate_min_max(cl, **params: dict):
17-
mn = params.get("min")
18-
mx = params.get("max")
19-
if mn is not None and mx is not None:
15+
def dlt_generate_min_max(col_name, **params: dict):
16+
min_limit = params.get("min")
17+
max_limit = params.get("max")
18+
if min_limit is not None and max_limit is not None:
2019
# We can generate `col between(min, max)`, but this one is easier to modify if you need to remove some of the bounds
21-
return f"{cl} >= {val_to_str(mn)} and {cl} <= {val_to_str(mx)}"
22-
elif mx is not None:
23-
return f"{cl} <= {val_to_str(mx)}"
24-
elif mn is not None:
25-
return f"{cl} >= {val_to_str(mn)}"
20+
return f"{col_name} >= {val_to_str(min_limit)} and {col_name} <= {val_to_str(max_limit)}"
21+
22+
if max_limit is not None:
23+
return f"{col_name} <= {val_to_str(max_limit)}"
24+
25+
if min_limit is not None:
26+
return f"{col_name} >= {val_to_str(min_limit)}"
2627

2728
return ""
2829

2930

30-
def dlt_generate_is_not_null_or_empty(cl, **params: dict):
31+
def dlt_generate_is_not_null_or_empty(col_name, **params: dict):
3132
trim_strings = params.get("trim_strings", True)
32-
s = f"{cl} is not null and "
33+
msg = f"{col_name} is not null and "
3334
if trim_strings:
34-
s += "trim("
35-
s += cl
35+
msg += "trim("
36+
msg += col_name
3637
if trim_strings:
37-
s += ")"
38-
s += " <> ''"
39-
return s
38+
msg += ")"
39+
msg += " <> ''"
40+
return msg
4041

4142

4243
dlt_mapping = {
43-
"is_not_null": lambda cl, **params: f"{cl} is not null",
44+
"is_not_null": lambda col_name, **params: f"{col_name} is not null",
4445
"is_in": dlt_generate_is_in,
4546
"min_max": dlt_generate_min_max,
4647
"is_not_null_or_empty": dlt_generate_is_not_null_or_empty,
@@ -53,34 +54,34 @@ def generate_dlt_rules_python(rules: list[DQRule], action: str | None = None) ->
5354

5455
expectations = {}
5556
for rule in rules:
56-
nm = rule.name
57-
cl = rule.column
57+
rule_name = rule.name
58+
col_name = rule.column
5859
params = rule.parameters or {}
59-
if nm not in dlt_mapping:
60-
print(f"No rule '{nm}' for column '{cl}'. skipping...")
60+
if rule_name not in dlt_mapping:
61+
print(f"No rule '{rule_name}' for column '{col_name}'. skipping...")
6162
continue
62-
expr = dlt_mapping[nm](cl, **params)
63+
expr = dlt_mapping[rule_name](col_name, **params)
6364
if expr == "":
6465
print("Empty expression was generated for rule '{nm}' for column '{cl}'")
6566
continue
66-
exp_name = re.sub(__name_sanitize_re__, "_", f"{cl}_{nm}")
67+
exp_name = re.sub(__name_sanitize_re__, "_", f"{col_name}_{rule_name}")
6768
expectations[exp_name] = expr
6869

6970
if len(expectations) == 0:
7071
return ""
7172

72-
t = json.dumps(expectations)
73+
json_expectations = json.dumps(expectations)
7374
if action == "drop":
7475
exp_str = f"""@dlt.expect_all_or_drop(
75-
{t}
76+
{json_expectations}
7677
)"""
7778
elif action == "fail":
7879
exp_str = f"""@dlt.expect_all_or_fail(
79-
{t}
80+
{json_expectations}
8081
)"""
8182
else:
8283
exp_str = f"""@dlt.expect_all(
83-
{t}
84+
{json_expectations}
8485
)"""
8586
return exp_str
8687

@@ -96,28 +97,30 @@ def generate_dlt_rules_sql(rules: list[DQRule], action: str | None = None) -> li
9697
elif action == "fail":
9798
act_str = " ON VIOLATION FAIL UPDATE"
9899
for rule in rules:
99-
nm = rule.name
100-
cl = rule.column
100+
rule_name = rule.name
101+
col_name = rule.column
101102
params = rule.parameters or {}
102-
if nm not in dlt_mapping:
103-
print(f"No rule '{nm}' for column '{cl}'. skipping...")
103+
if rule_name not in dlt_mapping:
104+
print(f"No rule '{rule_name}' for column '{col_name}'. skipping...")
104105
continue
105-
expr = dlt_mapping[nm](cl, **params)
106+
expr = dlt_mapping[rule_name](col_name, **params)
106107
if expr == "":
107108
print("Empty expression was generated for rule '{nm}' for column '{cl}'")
108109
continue
109110
# TODO: generate constraint name in lower_case, etc.
110-
dlt_rule = f"CONSTRAINT {cl}_{nm} EXPECT ({expr}){act_str}"
111+
dlt_rule = f"CONSTRAINT {col_name}_{rule_name} EXPECT ({expr}){act_str}"
111112
dlt_rules.append(dlt_rule)
112113

113114
return dlt_rules
114115

115116

116-
def generate_dlt_rules(rules: list[DQRule], action: str | None = None, language: str = "SQL") -> Union[list[str], str]:
117+
def generate_dlt_rules(rules: list[DQRule], action: str | None = None, language: str = "SQL") -> list[str] | str:
117118
lang = language.lower()
119+
118120
if lang == "sql":
119121
return generate_dlt_rules_sql(rules, action)
120-
elif lang == "python":
122+
123+
if lang == "python":
121124
return generate_dlt_rules_python(rules, action)
122-
else:
123-
raise Exception(f"Unsupported language '{language}'")
125+
126+
raise ValueError(f"Unsupported language '{language}'")

src/databricks/labs/dqx/profiler/dq_generator.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from typing import Optional
2-
31
from databricks.labs.dqx.profiler.common import val_maybe_to_str
42
from databricks.labs.dqx.profiler.profiler import DQRule
53

@@ -14,41 +12,43 @@ def dq_generate_is_in(col_name: str, level: str = "error", **params: dict):
1412

1513
# TODO: rewrite it
1614
def dq_generate_min_max(col_name: str, level: str = "error", **params: dict):
17-
min = params.get("min")
18-
max = params.get("max")
15+
min_limit = params.get("min")
16+
max_limit = params.get("max")
1917

20-
if min is not None and max is not None:
18+
if min_limit is not None and max_limit is not None:
2119
return {
2220
"check": {
2321
"function": "col_is_in_range",
2422
"arguments": {
2523
"col_name": col_name,
26-
"min_limit": val_maybe_to_str(min, include_sql_quotes=False),
27-
"max_limit": val_maybe_to_str(max, include_sql_quotes=False),
24+
"min_limit": val_maybe_to_str(min_limit, include_sql_quotes=False),
25+
"max_limit": val_maybe_to_str(max_limit, include_sql_quotes=False),
2826
},
2927
},
3028
"name": f"{col_name}_isnt_in_range",
3129
"criticality": level,
3230
}
33-
elif max is not None:
31+
32+
if max_limit is not None:
3433
return {
3534
"check": {
3635
"function": "col_not_greater_than",
3736
"arguments": {
3837
"col_name": col_name,
39-
"val": val_maybe_to_str(max, include_sql_quotes=False),
38+
"val": val_maybe_to_str(max_limit, include_sql_quotes=False),
4039
},
4140
},
4241
"name": f"{col_name}_not_greater_than",
4342
"criticality": level,
4443
}
45-
elif min is not None:
44+
45+
if min_limit is not None:
4646
return {
4747
"check": {
4848
"function": "col_not_less_than",
4949
"arguments": {
5050
"col_name": col_name,
51-
"val": val_maybe_to_str(min, include_sql_quotes=False),
51+
"val": val_maybe_to_str(min_limit, include_sql_quotes=False),
5252
},
5353
},
5454
"name": f"{col_name}_not_less_than",
@@ -58,7 +58,7 @@ def dq_generate_min_max(col_name: str, level: str = "error", **params: dict):
5858
return None
5959

6060

61-
def dq_generate_is_not_null(col_name: str, level: str = "error", **params: dict):
61+
def dq_generate_is_not_null(col_name: str, level: str = "error", **params: dict): # pylint: disable=unused-argument
6262
return {
6363
"check": {"function": "col_is_not_null", "arguments": {"col_name": col_name}},
6464
"name": f"{col_name}_is_null",
@@ -85,18 +85,18 @@ def dq_generate_is_not_null_or_empty(col_name: str, level: str = "error", **para
8585
}
8686

8787

88-
def generate_dq_rules(rules: Optional[list[DQRule]] = None, level: str = "error") -> list[dict]:
88+
def generate_dq_rules(rules: list[DQRule] | None = None, level: str = "error") -> list[dict]:
8989
if rules is None:
9090
rules = []
9191
dq_rules = []
9292
for rule in rules:
93-
nm = rule.name
93+
rule_name = rule.name
9494
col_name = rule.column
9595
params = rule.parameters or {}
96-
if nm not in dq_mapping:
97-
print(f"No rule '{nm}' for column '{col_name}'. skipping...")
96+
if rule_name not in dq_mapping:
97+
print(f"No rule '{rule_name}' for column '{col_name}'. skipping...")
9898
continue
99-
expr = dq_mapping[nm](col_name, level, **params)
99+
expr = dq_mapping[rule_name](col_name, level, **params)
100100
if expr:
101101
dq_rules.append(expr)
102102

0 commit comments

Comments
 (0)