Skip to content

Commit

Permalink
Add range validation
Browse files Browse the repository at this point in the history
  • Loading branch information
David Almeida committed Feb 7, 2025
1 parent fc26782 commit 3a3c650
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 3 deletions.
44 changes: 42 additions & 2 deletions nomenclature/processor/data_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pandas import concat
from pyam import IamDataFrame
from pyam.logging import adjust_log_level
from pydantic import computed_field, field_validator, model_validator
from pydantic import computed_field, field_validator, model_validator, Field

from nomenclature.definition import DataStructureDefinition
from nomenclature.error import ErrorCollector
Expand Down Expand Up @@ -86,9 +86,49 @@ def criteria(self):
)


class DataValidationCriteriaRange(DataValidationCriteria):
range: list[float] = Field(..., min_length=2, max_length=2)

@model_validator(mode="after")
def check_range_is_valid(self):
if self.range[0] > self.range[1]:
raise ValueError("Validation range is invalid: " + str(self.criteria))
return self

@computed_field
def upper_bound(self) -> float:
return self.range[1]

@computed_field
def lower_bound(self) -> float:
return self.range[0]

@property
def validation_args(self):
"""Attributes used for validation (as bounds)."""
return self.model_dump(
exclude_none=True,
exclude_unset=True,
exclude=["warning_level", "range"],
)

@property
def criteria(self):
return self.model_dump(
exclude_none=True,
exclude_unset=True,
exclude=["warning_level", "lower_bound", "upper_bound"],
)


class DataValidationCriteriaMultiple(IamcDataFilter):
validation: (
list[DataValidationCriteriaValue | DataValidationCriteriaBounds] | None
list[
DataValidationCriteriaValue
| DataValidationCriteriaBounds
| DataValidationCriteriaRange
]
| None
) = None

@model_validator(mode="after")
Expand Down
11 changes: 11 additions & 0 deletions tests/data/validation/validate_data/validate_warning_range.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
- variable: Primary Energy
year: 2010
validation:
- range: [ 1, 5 ]
- warning_level: low
upper_bound: 2.5
lower_bound: 1
- variable: Primary Energy|Coal
year: 2010
upper_bound: 5
lower_bound: 1
8 changes: 7 additions & 1 deletion tests/test_validate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_DataValidator_apply_fails(simple_df, file, item_1, item_2, item_3, capl

@pytest.mark.parametrize(
"file, value",
[("joined", 6.0), ("joined", 3.0), ("legacy", 6.0)],
[("joined", 6.0), ("joined", 3.0), ("legacy", 6.0), ("range", 6.0)],
)
def test_DataValidator_validate_with_warning(file, value, simple_df, caplog):
"""Checks that failed validation rows are printed in log."""
Expand All @@ -154,6 +154,7 @@ def test_DataValidator_validate_with_warning(file, value, simple_df, caplog):
0 model_a scen_a World Primary Energy EJ/yr 2010 6.0 error
1 model_a scen_b World Primary Energy EJ/yr 2010 7.0 error"""
)

if file == "legacy":
# prints both error and low warning levels for legacy format
# because these are treated as independent validation-criteria
Expand All @@ -164,6 +165,11 @@ def test_DataValidator_validate_with_warning(file, value, simple_df, caplog):
0 model_a scen_a World Primary Energy EJ/yr 2010 6.0 low
1 model_a scen_b World Primary Energy EJ/yr 2010 7.0 low"""

if file == "range":
failed_validation_message = failed_validation_message.replace(
"upper_bound: 5.0, lower_bound: 1.0", "range: [1.0, 5.0]"
)

if value == 3.0:
# prints each warning level when each is triggered by different rows
failed_validation_message = """
Expand Down

0 comments on commit 3a3c650

Please sign in to comment.