diff --git a/docs/user_guide/config.rst b/docs/user_guide/config.rst index 57f3db8e..65ef0634 100644 --- a/docs/user_guide/config.rst +++ b/docs/user_guide/config.rst @@ -152,3 +152,25 @@ validation: - region - variable - scenario + + +Filter model mappings from external repositories +------------------------------------------------ + +We often only want to use a subset of models in a particular project (and not import all mappings), so +there is an option to filter for specific model mappings. This works very similarly to +the filtering for definitions. + +.. code:: yaml + + repositories: + common-definitions: + url: https://github.com/IAMconsortium/common-definitions.git/ + mappings: + repository: + name: common-definitions + include: + - MESSAGEix-GLOBIOM 2.1-M-R12 + +The above example retrieves only the model mapping for *MESSAGEix-GLOBIOM 2.1-M-R12* +from the common-definitions repository. diff --git a/nomenclature/code.py b/nomenclature/code.py index 415b308f..24ba9bcf 100644 --- a/nomenclature/code.py +++ b/nomenclature/code.py @@ -112,6 +112,10 @@ def flattened_dict_serialized(self): def depth(self) -> int: return self.name.count("|") + @property + def from_external_repository(self) -> bool: + return self.repository is not None + def replace_tag(self, tag: str, target: "Code") -> "Code": """Return a new instance with tag applied diff --git a/nomenclature/config.py b/nomenclature/config.py index fb5272c2..24c7eb52 100644 --- a/nomenclature/config.py +++ b/nomenclature/config.py @@ -209,6 +209,19 @@ def repos(self) -> dict[str, str]: class MappingRepository(BaseModel): name: str + include: list[str] = ["*"] + + @property + def regex_include_patterns(self): + return [re.compile(escape_regexp(pattern) + "$") for pattern in self.include] + + def match_models(self, models: list[str]) -> list[str]: + return [ + model + for model in models + for pattern in self.regex_include_patterns + if re.match(pattern, model) is not None + ] class RegionMappingConfig(BaseModel): diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py index 976223d6..c3255836 100644 --- a/nomenclature/processor/region.py +++ b/nomenclature/processor/region.py @@ -13,10 +13,12 @@ AfterValidator, BaseModel, ConfigDict, + Field, ValidationInfo, field_validator, model_validator, validate_call, + field_serializer, ) from pydantic.types import DirectoryPath, FilePath from pydantic_core import PydanticCustomError @@ -109,23 +111,11 @@ class RegionAggregationMapping(BaseModel): model: list[str] file: FilePath - native_regions: list[NativeRegion] | None = None - common_regions: list[CommonRegion] | None = None - exclude_regions: list[str] | None = None + native_regions: list[NativeRegion] = Field(default_factory=list) + common_regions: list[CommonRegion] = Field(default_factory=list) + exclude_regions: list[str] = Field(default_factory=list) - @model_validator(mode="before") - @classmethod - def check_no_additional_attributes(cls, v): - if illegal_additional_attributes := [ - input_attribute - for input_attribute in v.keys() - if input_attribute not in cls.model_fields - ]: - raise ValueError( - "Illegal attributes in 'RegionAggregationMapping': " - f"{illegal_additional_attributes} (file {v['file']})" - ) - return v + model_config = ConfigDict(extra="forbid") @field_validator("model", mode="before") @classmethod @@ -188,7 +178,7 @@ def check_native_or_common_regions( cls, v: "RegionAggregationMapping" ) -> "RegionAggregationMapping": # Check that we have at least one of the two: native and common regions - if v.native_regions is None and v.common_regions is None: + if not v.native_regions and not v.common_regions: raise ValueError( "At least one of 'native_regions' and 'common_regions' must be " f"provided in {v.file}" @@ -201,9 +191,7 @@ def check_illegal_renaming( cls, v: "RegionAggregationMapping" ) -> "RegionAggregationMapping": """Check if any renaming overlaps with common regions""" - # Skip if only either native-regions or common-regions are specified - if v.native_regions is None or v.common_regions is None: - return v + native_region_names = {nr.target_native_region for nr in v.native_regions} common_region_names = {cr.name for cr in v.common_regions} overlap = list(native_region_names & common_region_names) @@ -233,7 +221,7 @@ def check_exclude_common_region_overlap( return _check_exclude_region_overlap(v, "common_regions") @classmethod - def from_file(cls, file: Path | str): + def from_file(cls, file: Path | str) -> "RegionAggregationMapping": """Initialize a RegionAggregationMapping from a file. Parameters @@ -380,6 +368,10 @@ def upload_native_regions(self) -> list[str]: def reverse_rename_mapping(self) -> dict[str, str]: return {renamed: original for original, renamed in self.rename_mapping.items()} + @property + def models(self) -> list[str]: + return self.model + def check_unexpected_regions(self, df: IamDataFrame) -> None: # Raise error if a region in the input data is not used in the model mapping @@ -404,28 +396,35 @@ def check_unexpected_regions(self, df: IamDataFrame) -> None: def __eq__(self, other: "RegionAggregationMapping") -> bool: return self.model_dump(exclude={"file"}) == other.model_dump(exclude={"file"}) + @field_serializer("model", when_used="json") + def serialize_model(self, model) -> str | list[str]: + return model[0] if len(model) == 1 else model + + @field_serializer("native_regions", when_used="json") + def serialize_native_regions(self, native_regions) -> list: + return [ + ( + {native_region.name: native_region.rename} + if native_region.rename + else native_region.name + ) + for native_region in native_regions + ] + + @field_serializer("common_regions", when_used="json") + def serialize_common_regions(self, common_regions) -> list: + return [ + {common_region.name: common_region.constituent_regions} + for common_region in common_regions + ] + def to_yaml(self, file) -> None: - dict_representation = { - "model": self.model[0] if len(self.model) == 1 else self.model - } - if self.native_regions: - dict_representation["native_regions"] = [ - ( - {native_region.name: native_region.rename} - if native_region.rename - else native_region.name - ) - for native_region in self.native_regions - ] - if self.common_regions: - dict_representation["common_regions"] = [ - {common_region.name: common_region.constituent_regions} - for common_region in self.common_regions - ] - if self.exclude_regions: - dict_representation["exclude_regions"] = self.exclude_regions with open(file, "w", encoding="utf-8") as f: - yaml.dump(dict_representation, f, sort_keys=False) + yaml.dump( + self.model_dump(mode="json", exclude_defaults=True, exclude={"file"}), + f, + sort_keys=False, + ) def validate_with_definition(v: RegionAggregationMapping, info: ValidationInfo): @@ -479,21 +478,31 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition): mapping_dict: dict[str, RegionAggregationMapping] = {} errors = ErrorCollector() - mapping_files = [f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}] + mapping_files = [mapping_file for mapping_file in path.glob("**/*.y*ml")] + # Read model mappings from external repositories for repository in dsd.config.mappings.repositories: - mapping_files.extend( - f - for f in ( - dsd.config.repositories[repository.name].local_path / "mappings" - ).glob("**/*") - if f.suffix in {".yaml", ".yml"} - ) + for mapping_file in ( + dsd.config.repositories[repository.name].local_path / "mappings" + ).glob("**/*.y*ml"): + mapping = RegionAggregationMapping.from_file(mapping_file) + for model in repository.match_models(mapping.models): + if model not in mapping_dict: + mapping_dict[model] = mapping + else: + errors.append( + ValueError( + "Multiple region aggregation mappings for " + f"model {model} in [{mapping.file}, " + f"{mapping_dict[model].file}]" + ) + ) - for file in mapping_files: + # Read model mappings from the local repository + for mapping_file in mapping_files: try: - mapping = RegionAggregationMapping.from_file(file) - for model in mapping.model: + mapping = RegionAggregationMapping.from_file(mapping_file) + for model in mapping.models: if model not in mapping_dict: mapping_dict[model] = mapping else: @@ -620,70 +629,64 @@ def _apply_region_processing( # silence pyam's empty filter warnings with adjust_log_level(logger="pyam", level="ERROR"): # rename native regions - if self.mappings[model].native_regions is not None: - _df = model_df.filter( - region=self.mappings[model].model_native_region_names + _df = model_df.filter(region=self.mappings[model].model_native_region_names) + if not _df.empty: + _processed_data.append( + _df.rename(region=self.mappings[model].rename_mapping)._data ) - if not _df.empty: - _processed_data.append( - _df.rename(region=self.mappings[model].rename_mapping)._data - ) # aggregate common regions - if self.mappings[model].common_regions is not None: - for common_region in self.mappings[model].common_regions: - # if a common region is consists of a single native region, rename - if common_region.is_single_constituent_region: - _df = model_df.filter( - region=common_region.constituent_regions[0] - ).rename(region=common_region.rename_dict) - if not _df.empty: - _processed_data.append(_df._data) - continue + for common_region in self.mappings[model].common_regions: + # if a common region is consists of a single native region, rename + if common_region.is_single_constituent_region: + _df = model_df.filter( + region=common_region.constituent_regions[0] + ).rename(region=common_region.rename_dict) + if not _df.empty: + _processed_data.append(_df._data) + continue - # if there are multiple constituent regions, aggregate - regions = [common_region.name, common_region.constituent_regions] + # if there are multiple constituent regions, aggregate + regions = [common_region.name, common_region.constituent_regions] - # first, perform 'simple' aggregation (no arguments) - simple_vars = [ - var - for var in self.variable_codelist.vars_default_args( - model_df.variable - ) - ] - _df = model_df.aggregate_region( - simple_vars, - *regions, + # first, perform 'simple' aggregation (no arguments) + simple_vars = [ + var + for var in self.variable_codelist.vars_default_args( + model_df.variable ) - if _df is not None and not _df.empty: - _processed_data.append(_df._data) - - # second, special weighted aggregation - for var in self.variable_codelist.vars_kwargs(model_df.variable): - if var.region_aggregation is None: - _df = _aggregate_region( - model_df, - var.name, - *regions, - **var.pyam_agg_kwargs, - ) - if _df is not None and not _df.empty: - _processed_data.append(_df._data) - else: - for rename_var in var.region_aggregation: - for _rename, _kwargs in rename_var.items(): - _df = _aggregate_region( - model_df, - var.name, - *regions, - **_kwargs, + ] + _df = model_df.aggregate_region( + simple_vars, + *regions, + ) + if _df is not None and not _df.empty: + _processed_data.append(_df._data) + + # second, special weighted aggregation + for var in self.variable_codelist.vars_kwargs(model_df.variable): + if var.region_aggregation is None: + _df = _aggregate_region( + model_df, + var.name, + *regions, + **var.pyam_agg_kwargs, + ) + if _df is not None and not _df.empty: + _processed_data.append(_df._data) + else: + for rename_var in var.region_aggregation: + for _rename, _kwargs in rename_var.items(): + _df = _aggregate_region( + model_df, + var.name, + *regions, + **_kwargs, + ) + if _df is not None and not _df.empty: + _processed_data.append( + _df.rename(variable={var.name: _rename})._data ) - if _df is not None and not _df.empty: - _processed_data.append( - _df.rename( - variable={var.name: _rename} - )._data - ) common_region_df = model_df.filter( region=self.mappings[model].common_region_names, diff --git a/tests/data/config/filter_mappings.yaml b/tests/data/config/filter_mappings.yaml new file mode 100644 index 00000000..90a95fb7 --- /dev/null +++ b/tests/data/config/filter_mappings.yaml @@ -0,0 +1,9 @@ +repositories: + common-definitions: + url: https://github.com/IAMconsortium/common-definitions.git/ + hash: 091c0fe +mappings: + repository: + name: common-definitions + include: + - MESSAGEix-GLOBIOM 2.1-M-R12 diff --git a/tests/data/region_processing/external_repo_test/nomenclature.yaml b/tests/data/region_processing/external_repo_test/nomenclature.yaml index 1945b452..aae2198a 100644 --- a/tests/data/region_processing/external_repo_test/nomenclature.yaml +++ b/tests/data/region_processing/external_repo_test/nomenclature.yaml @@ -11,4 +11,7 @@ definitions: variable: repository: common-definitions mappings: - repository: common-definitions + repository: + name: common-definitions + include: + - REMIND-MAgPIE 3.1-4.6 diff --git a/tests/test_config.py b/tests/test_config.py index 1385bfa5..73f9efa8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,10 +2,7 @@ import pytest from pytest import raises -from nomenclature.config import ( - Repository, - NomenclatureConfig, -) +from nomenclature.config import Repository, NomenclatureConfig, MappingRepository from conftest import TEST_DATA_DIR, clean_up_external_repos @@ -93,3 +90,18 @@ def test_config_with_filter(config_file): assert isinstance(config.definitions.variable.repositories, list) finally: clean_up_external_repos(config.repositories) + + +def test_config_external_repo_mapping_filter(): + + config = NomenclatureConfig.from_file( + TEST_DATA_DIR / "config" / "filter_mappings.yaml" + ) + exp = MappingRepository( + name="common-definitions", include=["MESSAGEix-GLOBIOM 2.1-M-R12"] + ) + try: + assert isinstance(config.mappings.repositories, list) + assert config.mappings.repositories[0] == exp + finally: + clean_up_external_repos(config.repositories) diff --git a/tests/test_region_aggregation.py b/tests/test_region_aggregation.py index 80ed54cb..85b16555 100644 --- a/tests/test_region_aggregation.py +++ b/tests/test_region_aggregation.py @@ -43,7 +43,7 @@ def test_mapping(): "constituent_regions": ["region_c"], }, ], - "exclude_regions": None, + "exclude_regions": [], } assert obs.model_dump() == exp @@ -51,10 +51,6 @@ def test_mapping(): @pytest.mark.parametrize( "file, error_msg_pattern", [ - ( - "illegal_mapping_illegal_attribute.yaml", - "Illegal attributes in 'RegionAggregationMapping'", - ), ( "illegal_mapping_conflict_regions.yaml", "Name collision in native and common regions.*common_region_1", @@ -92,6 +88,15 @@ def test_illegal_mappings(file, error_msg_pattern): RegionAggregationMapping.from_file(TEST_FOLDER_REGION_AGGREGATION / file) +def test_illegal_additional_attribute(): + with pytest.raises( + pydantic.ValidationError, match="Extra inputs are not permitted" + ): + RegionAggregationMapping.from_file( + TEST_FOLDER_REGION_AGGREGATION / "illegal_mapping_illegal_attribute.yaml" + ) + + def test_mapping_parsing_error(): with pytest.raises(ValueError, match="string indices must be integers"): RegionAggregationMapping.from_file( @@ -119,15 +124,15 @@ def test_region_processor_working(region_processor_path, simple_definition): "native_regions": [ {"name": "World", "rename": None}, ], - "common_regions": None, - "exclude_regions": None, + "common_regions": [], + "exclude_regions": [], }, { "model": ["model_b"], "file": ( TEST_FOLDER_REGION_PROCESSING / "regionprocessor_working/mapping_2.yaml" ).relative_to(Path.cwd()), - "native_regions": None, + "native_regions": [], "common_regions": [ { "name": "World", @@ -239,7 +244,7 @@ def test_region_processor_unexpected_region_raises(): def test_mapping_from_external_repository(): - # This test reads both mappings and definitions from an external repository only + # This test reads definitions and the mapping for only MESSAGEix-GLOBIOM 2.1-M-R12 # from an external repository only try: processor = RegionProcessor.from_directory( TEST_FOLDER_REGION_PROCESSING / "external_repo_test" / "mappings", @@ -247,11 +252,7 @@ def test_mapping_from_external_repository(): TEST_FOLDER_REGION_PROCESSING / "external_repo_test" / "definitions" ), ) - - assert all( - model in processor.mappings.keys() - for model in ("REMIND 3.1", "REMIND-MAgPIE 3.1-4.6") - ) + assert {"REMIND-MAgPIE 3.1-4.6"} == set(processor.mappings.keys()) finally: clean_up_external_repos(dsd.config.repositories)