Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix wildcard variable aggregation #446

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,11 +595,15 @@ def check_weight_in_vars(cls, v):
)
return v

def vars_default_args(self, variables: list[str]) -> list[VariableCode]:
def vars_default_args(self, variables: list[str]) -> list[str]:
"""return subset of variables which does not feature any special pyam
aggregation arguments and where skip_region_aggregation is False"""
# this is where the problem lies
# say we have a variable code that is Primary Energy* and a variable in variables called Primary Energy|1
# Our lookup will never work since Primary Energy* is not the same as Primary Energy|1. But we need to uniquely identify which variable pattern we're matching to get aggregation information agg_kwargs and skip_region_aggregation
# we need to perform a wildcard match of var on self and then look up the result (WHICH HAS TO BE UNQIUE -> https://github.com/IAMconsortium/nomenclature/issues/432) and get VariableCode.agg_kwargs and VariableCode.skip_region_aggregation
return [
self[var]
var
for var in variables
if not self[var].agg_kwargs and not self[var].skip_region_aggregation
]
Expand All @@ -621,8 +625,7 @@ def validate_units(
if invalid_units := [
(variable, unit, self.mapping[variable].unit)
for variable, unit in unit_mapping.items()
if variable in self.variables
and unit not in self.mapping[variable].units
if variable in self.variables and unit not in self.mapping[variable].units
]:
lst = [
f"'{v}' - expected: {'one of ' if isinstance(e, list) else ''}"
Expand Down
2 changes: 1 addition & 1 deletion nomenclature/processor/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@ def _apply_region_processing(

# first, perform 'simple' aggregation (no arguments)
simple_vars = [
var.name
var
for var in self.variable_codelist.vars_default_args(
model_df.variable
)
Expand Down
3 changes: 3 additions & 0 deletions tests/data/region_processing/dsd/variable/variables.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
- Primary Energy|Coal:
definition: Primary energy consumption of Coal
unit: EJ/yr
- Primary Energy|*:
definition: Any other variable
unit: EJ/yr
- Share|Coal:
definition: Share of Coal in the total primary energy mix
unit:
Expand Down
13 changes: 10 additions & 3 deletions tests/test_codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,16 @@ def test_illegal_char_ignores_external_repo():
"""Check that external repos are excluded from this check."""
# the config includes illegal characters known to be in common-definitions
# the test will not raise errors as the check is skipped for external repos
DataStructureDefinition(
MODULE_TEST_DATA_DIR / "illegal_chars" / "char_in_external_repo" / "definitions"
)

try:
dsd = DataStructureDefinition(
MODULE_TEST_DATA_DIR
/ "illegal_chars"
/ "char_in_external_repo"
/ "definitions"
)
finally:
clean_up_external_repos(dsd.config.repositories)


def test_end_whitespace_fails():
Expand Down
36 changes: 36 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,42 @@ def test_region_processing_aggregate():
assert_iamframe_equal(obs, exp)


def test_region_processing_aggregate_wildcard():
# Test only the aggregation feature
test_df = IamDataFrame(
pd.DataFrame(
[
["model_a", "scen_a", "region_A", "Primary Energy|1", "EJ/yr", 1, 2],
["model_a", "scen_a", "region_B", "Primary Energy|1", "EJ/yr", 3, 4],
["model_a", "scen_a", "region_A", "Primary Energy|2", "EJ/yr", 5, 6],
["model_a", "scen_a", "region_B", "Primary Energy|2", "EJ/yr", 7, 8],
],
columns=IAMC_IDX + [2005, 2010],
)
)
# add_meta(test_df)

exp = IamDataFrame(
pd.DataFrame(
[
["model_a", "scen_a", "World", "Primary Energy|1", "EJ/yr", 4, 6],
["model_a", "scen_a", "World", "Primary Energy|2", "EJ/yr", 12, 14],
],
columns=IAMC_IDX + [2005, 2010],
)
)
# add_meta(exp)
obs = process(
test_df,
dsd := DataStructureDefinition(TEST_DATA_DIR / "region_processing/dsd"),
processor=RegionProcessor.from_directory(
TEST_DATA_DIR / "region_processing/aggregate_only", dsd
),
)

assert_iamframe_equal(obs, exp)


@pytest.mark.parametrize(
"directory", ("complete_processing", "complete_processing_list")
)
Expand Down
Loading