Skip to content

Commit 965ce96

Browse files
committed
Revert "Makes data validator API consistent with other type-checking"
This reverts commit d903745.
1 parent 5cea4a7 commit 965ce96

File tree

4 files changed

+43
-51
lines changed

4 files changed

+43
-51
lines changed

hamilton/data_quality/base.py

+5-19
Original file line numberDiff line numberDiff line change
@@ -35,27 +35,13 @@ def __init__(self, importance: str):
3535
def importance(self) -> DataValidationLevel:
3636
return self._importance
3737

38-
@classmethod
39-
def applies_to(cls, datatype: Type[Type]) -> bool:
40-
"""Whether or not this data validator can apply to the specified dataset.
41-
Note that overriding this is not the intended API (it was the old one),
42-
but this will be a stable part of the API moving forward, at least until
43-
Hamilton 2.0.
38+
@abc.abstractmethod
39+
def applies_to(self, datatype: Type[Type]) -> bool:
40+
"""Whether or not this data validator can apply to the specified dataset
4441
45-
:param datatype: Datatype to validate.
42+
:param datatype:
4643
:return: True if it can be run on the specified type, false otherwise
4744
"""
48-
for type_ in cls.applicable_types():
49-
if type_ == Any or issubclass(type_, datatype):
50-
return True
51-
return False
52-
53-
@classmethod
54-
def applicable_types(cls) -> List[type]:
55-
"""Returns the list of classes for which this is valid.
56-
57-
:return: List of classes
58-
"""
5945
pass
6046

6147
@abc.abstractmethod
@@ -132,7 +118,7 @@ def __init__(self, importance: str):
132118

133119
@classmethod
134120
@abc.abstractmethod
135-
def applicable_types(cls) -> List[type]:
121+
def applies_to(cls, datatype: Type[Type]) -> bool:
136122
pass
137123

138124
@abc.abstractmethod

hamilton/data_quality/default_validators.py

+22-20
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ def arg(cls) -> str:
2424
return "range"
2525

2626
@classmethod
27-
def applicable_types(cls) -> List[type]:
28-
return [pd.Series]
27+
def applies_to(cls, datatype: Type[Type]) -> bool:
28+
return issubclass(datatype, pd.Series) # TODO -- handle dataframes?
2929

3030
def description(self) -> str:
3131
return f"Validates that the datapoint falls within the range ({self.range[0]}, {self.range[1]})"
@@ -69,8 +69,8 @@ def arg(cls) -> str:
6969
return "values_in"
7070

7171
@classmethod
72-
def applicable_types(cls) -> List[type]:
73-
return [pd.Series]
72+
def applies_to(cls, datatype: Type[Type]) -> bool:
73+
return issubclass(datatype, pd.Series) # TODO -- handle dataframes?
7474

7575
def description(self) -> str:
7676
return f"Validates that all data points are from a fixed set of values: ({self.values}), ignoring NA values."
@@ -113,8 +113,8 @@ def __init__(self, range: Tuple[numbers.Real, numbers.Real], importance: str):
113113
self.range = range
114114

115115
@classmethod
116-
def applicable_types(cls) -> List[type]:
117-
return [numbers.Real]
116+
def applies_to(cls, datatype: Type[Type]) -> bool:
117+
return issubclass(datatype, numbers.Real)
118118

119119
def description(self) -> str:
120120
return f"Validates that the datapoint falls within the range ({self.range[0]}, {self.range[1]})"
@@ -151,8 +151,10 @@ def arg(cls) -> str:
151151
return "values_in"
152152

153153
@classmethod
154-
def applicable_types(cls) -> List[type]:
155-
return [numbers.Real, str]
154+
def applies_to(cls, datatype: Type[Type]) -> bool:
155+
return issubclass(datatype, numbers.Real) or issubclass(
156+
datatype, str
157+
) # TODO support list, dict and typing.* variants
156158

157159
def description(self) -> str:
158160
return f"Validates that python values are from a fixed set of values: ({self.values})."
@@ -187,8 +189,8 @@ def _to_percent(fraction: float):
187189
return "{0:.2%}".format(fraction)
188190

189191
@classmethod
190-
def applicable_types(cls) -> List[type]:
191-
return [pd.Series]
192+
def applies_to(cls, datatype: Type[Type]) -> bool:
193+
return issubclass(datatype, pd.Series)
192194

193195
def description(self) -> str:
194196
return f"Validates that no more than {MaxFractionNansValidatorPandasSeries._to_percent(self.max_fraction_nans)} of the data is Nan."
@@ -249,8 +251,8 @@ def __init__(self, data_type: Type[Type], importance: str):
249251
self.datatype = data_type
250252

251253
@classmethod
252-
def applicable_types(cls) -> List[type]:
253-
return [pd.Series]
254+
def applies_to(cls, datatype: Type[Type]) -> bool:
255+
return issubclass(datatype, pd.Series)
254256

255257
def description(self) -> str:
256258
return f"Validates that the datatype of the pandas series is a subclass of: {self.datatype}"
@@ -280,8 +282,8 @@ def __init__(self, data_type: Type[Type], importance: str):
280282
self.datatype = data_type
281283

282284
@classmethod
283-
def applicable_types(cls) -> List[type]:
284-
return [numbers.Real, str, bool, int, float, list, dict]
285+
def applies_to(cls, datatype: Type[Type]) -> bool:
286+
return issubclass(datatype, numbers.Real) or datatype in (str, bool)
285287

286288
def description(self) -> str:
287289
return f"Validates that the datatype of the pandas series is a subclass of: {self.datatype}"
@@ -310,8 +312,8 @@ def __init__(self, max_standard_dev: float, importance: str):
310312
self.max_standard_dev = max_standard_dev
311313

312314
@classmethod
313-
def applicable_types(cls) -> List[type]:
314-
return [pd.Series]
315+
def applies_to(cls, datatype: Type[Type]) -> bool:
316+
return issubclass(datatype, pd.Series)
315317

316318
def description(self) -> str:
317319
return f"Validates that the standard deviation of a pandas series is no greater than : {self.max_standard_dev}"
@@ -338,8 +340,8 @@ def __init__(self, mean_in_range: Tuple[float, float], importance: str):
338340
self.mean_in_range = mean_in_range
339341

340342
@classmethod
341-
def applicable_types(cls) -> List[type]:
342-
return [pd.Series]
343+
def applies_to(cls, datatype: Type[Type]) -> bool:
344+
return issubclass(datatype, pd.Series)
343345

344346
def description(self) -> str:
345347
return f"Validates that a pandas series has mean in range [{self.mean_in_range[0]}, {self.mean_in_range[1]}]"
@@ -366,8 +368,8 @@ def __init__(self, allow_none: bool, importance: str):
366368
self.allow_none = allow_none
367369

368370
@classmethod
369-
def applicable_types(cls) -> List[type]:
370-
return [Any]
371+
def applies_to(cls, datatype: Type[Type]) -> bool:
372+
return True
371373

372374
def description(self) -> str:
373375
if self.allow_none:

hamilton/data_quality/pandera_validators.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import List
1+
from typing import Type
22

33
import pandas as pd
44
import pandera as pa
@@ -14,8 +14,10 @@ def __init__(self, schema: pa.DataFrameSchema, importance: str):
1414
self.schema = schema
1515

1616
@classmethod
17-
def applicable_types(cls) -> List[type]:
18-
return [pd.DataFrame]
17+
def applies_to(cls, datatype: Type[Type]) -> bool:
18+
return issubclass(
19+
datatype, pd.DataFrame
20+
) # TODO -- allow for modin, etc. as they come for free with pandera
1921

2022
def description(self) -> str:
2123
return "Validates that the returned dataframe matches the pander"
@@ -52,8 +54,10 @@ def __init__(self, schema: pa.SeriesSchema, importance: str):
5254
self.schema = schema
5355

5456
@classmethod
55-
def applicable_types(cls) -> List[type]:
56-
return [pd.Series]
57+
def applies_to(cls, datatype: Type[Type]) -> bool:
58+
return issubclass(
59+
datatype, pd.Series
60+
) # TODO -- allow for modin, etc. as they come for free with pandera
5761

5862
def description(self) -> str:
5963
pass

tests/resources/dq_dummy_examples.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import List
1+
from typing import Type
22

33
import pandas as pd
44

@@ -11,8 +11,8 @@ def __init__(self, equal_to: int, importance: str):
1111
self.equal_to = equal_to
1212

1313
@classmethod
14-
def applicable_types(cls) -> List[type]:
15-
return [int]
14+
def applies_to(cls, datatype: Type[Type]) -> bool:
15+
return datatype == int
1616

1717
def description(self) -> str:
1818
return "Data must be equal to 10 to be valid"
@@ -60,8 +60,8 @@ def validate(self, dataset: pd.Series) -> ValidationResult:
6060
)
6161

6262
@classmethod
63-
def applicable_types(cls) -> List[type]:
64-
return [pd.Series]
63+
def applies_to(cls, datatype: Type[Type]) -> bool:
64+
return datatype == pd.Series
6565

6666
@classmethod
6767
def arg(cls) -> str:
@@ -92,8 +92,8 @@ def validate(self, dataset: pd.Series) -> ValidationResult:
9292
)
9393

9494
@classmethod
95-
def applicable_types(cls) -> List[type]:
96-
return [pd.Series]
95+
def applies_to(cls, datatype: Type[Type]) -> bool:
96+
return datatype == pd.Series
9797

9898
@classmethod
9999
def arg(cls) -> str:

0 commit comments

Comments
 (0)