From f4627d9ca05537c3a035cfbae7ec824db7471ba6 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Fri, 9 Feb 2024 09:16:32 +0100 Subject: [PATCH] Allow nomenclatureconfig to have region country only (#320) * Remove None as default * Use NomenclatureConfig as default * Use NomenclatureConfig default object * Clean up imports * Remove suppress and use NomenclatureConfig default * Add test for nomenclature definition region only --- nomenclature/codelist.py | 87 ++++++++----------- nomenclature/config.py | 23 +++-- nomenclature/definition.py | 6 +- nomenclature/processor/region.py | 17 ++-- .../nomenclature.yaml | 3 + tests/test_definition.py | 8 ++ 6 files changed, 72 insertions(+), 72 deletions(-) create mode 100644 tests/data/general-config-only-country/nomenclature.yaml diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py index 9c8e1580..b26fea85 100644 --- a/nomenclature/codelist.py +++ b/nomenclature/codelist.py @@ -1,4 +1,3 @@ -from contextlib import suppress import logging from pathlib import Path from typing import ClassVar, Dict, List @@ -6,15 +5,14 @@ import numpy as np import pandas as pd import yaml -from pyam.utils import write_sheet -from pydantic import field_validator, BaseModel, ValidationInfo +from pyam.utils import is_list_like, write_sheet +from pydantic import BaseModel, ValidationInfo, field_validator from pydantic_core import PydanticCustomError import nomenclature from nomenclature.code import Code, MetaCode, RegionCode, VariableCode -from nomenclature.config import NomenclatureConfig -from nomenclature.error import custom_pydantic_errors, ErrorCollector -from pyam.utils import is_list_like +from nomenclature.config import CodeListConfig, NomenclatureConfig +from nomenclature.error import ErrorCollector, custom_pydantic_errors here = Path(__file__).parent.absolute() @@ -198,21 +196,14 @@ def from_directory( """ code_list = cls._parse_codelist_dir(path, file_glob_pattern) - - with suppress(AttributeError): - dimension = path.name - codelistconfig = getattr(config.definitions, dimension) - for repo in codelistconfig.repositories: - repo_path = ( - config.repositories[repo].local_path / "definitions" / dimension - ) - code_list = ( - cls._parse_codelist_dir( - repo_path, - file_glob_pattern, - ) - + code_list - ) + config = config or NomenclatureConfig() + for repo in getattr( + config.definitions, name.lower(), CodeListConfig() + ).repositories: + repo_path = config.repositories[repo].local_path / "definitions" / name + code_list = ( + cls._parse_codelist_dir(repo_path, file_glob_pattern) + code_list + ) errors = ErrorCollector() mapping: Dict[str, Code] = {} for code in code_list: @@ -604,35 +595,33 @@ def from_directory( code_list: List[RegionCode] = [] # initializing from general configuration - with suppress(AttributeError): - # adding all countries - if config.definitions.region.country is True: - for c in nomenclature.countries: - try: - code_list.append( - RegionCode( - name=c.name, iso3_codes=c.alpha_3, hierarchy="Country" - ) + # adding all countries + config = config or NomenclatureConfig() + if config.definitions.region.country is True: + for c in nomenclature.countries: + try: + code_list.append( + RegionCode( + name=c.name, iso3_codes=c.alpha_3, hierarchy="Country" ) - # special handling for countries that do not have an alpha_3 code - except AttributeError: - code_list.append(RegionCode(name=c.name, hierarchy="Country")) - - # importing from an external repository - for repo in config.definitions.region.repositories: - repo_path = ( - config.repositories[repo].local_path / "definitions" / "region" - ) - - code_list = cls._parse_region_code_dir( - code_list, - repo_path, - file_glob_pattern, - repository=config.definitions.region.repositories, - ) - code_list = cls._parse_and_replace_tags( - code_list, repo_path, file_glob_pattern - ) + ) + # special handling for countries that do not have an alpha_3 code + except AttributeError: + code_list.append(RegionCode(name=c.name, hierarchy="Country")) + + # importing from an external repository + for repo in config.definitions.region.repositories: + repo_path = config.repositories[repo].local_path / "definitions" / "region" + + code_list = cls._parse_region_code_dir( + code_list, + repo_path, + file_glob_pattern, + repository=config.definitions.region.repositories, + ) + code_list = cls._parse_and_replace_tags( + code_list, repo_path, file_glob_pattern + ) # parse from current repository code_list = cls._parse_region_code_dir(code_list, path, file_glob_pattern) diff --git a/nomenclature/config.py b/nomenclature/config.py index 3971c1d8..0bfb112f 100644 --- a/nomenclature/config.py +++ b/nomenclature/config.py @@ -27,9 +27,9 @@ def convert_to_set(v: str | list[str] | set[str]) -> set[str]: class CodeListConfig(BaseModel): - dimension: str - repositories: Annotated[set[str] | None, BeforeValidator(convert_to_set)] = Field( - None, alias="repository" + dimension: str | None = None + repositories: Annotated[set[str], BeforeValidator(convert_to_set)] = Field( + default_factory=set, alias="repository" ) model_config = ConfigDict(populate_by_name=True) @@ -94,8 +94,8 @@ class DataStructureConfig(BaseModel): """ - region: Optional[RegionCodeListConfig] = None - variable: Optional[CodeListConfig] = None + region: Optional[RegionCodeListConfig] = Field(default_factory=RegionCodeListConfig) + variable: Optional[CodeListConfig] = Field(default_factory=CodeListConfig) @field_validator("region", "variable", mode="before") @classmethod @@ -107,30 +107,29 @@ def repos(self) -> dict[str, str]: return { dimension: getattr(self, dimension).repositories for dimension in ("region", "variable") - if getattr(self, dimension) and getattr(self, dimension).repositories + if getattr(self, dimension).repositories } class RegionMappingConfig(BaseModel): repositories: Annotated[set[str], BeforeValidator(convert_to_set)] = Field( - ..., alias="repository" + default_factory=set, alias="repository" ) model_config = ConfigDict(populate_by_name=True) class NomenclatureConfig(BaseModel): - repositories: dict[str, Repository] = {} - definitions: Optional[DataStructureConfig] = None - mappings: Optional[RegionMappingConfig] = None + repositories: dict[str, Repository] = Field(default_factory=dict) + definitions: DataStructureConfig = Field(default_factory=DataStructureConfig) + mappings: RegionMappingConfig = Field(default_factory=RegionMappingConfig) @model_validator(mode="after") @classmethod def check_definitions_repository( cls, v: "NomenclatureConfig" ) -> "NomenclatureConfig": - definitions_repos = v.definitions.repos if v.definitions else {} mapping_repos = {"mappings": v.mappings.repositories} if v.mappings else {} - repos = {**definitions_repos, **mapping_repos} + repos = {**v.definitions.repos, **mapping_repos} for use, repositories in repos.items(): if repositories - v.repositories.keys(): raise ValueError((f"Unknown repository {repositories} in '{use}'.")) diff --git a/nomenclature/definition.py b/nomenclature/definition.py index e1eda28e..e6134a19 100644 --- a/nomenclature/definition.py +++ b/nomenclature/definition.py @@ -44,9 +44,11 @@ def __init__(self, path, dimensions=None): if (file := path.parent / "nomenclature.yaml").exists(): self.config = NomenclatureConfig.from_file(file=file) else: - self.config = None + self.config = NomenclatureConfig() - if not path.is_dir() and (self.config is None or not self.config.repositories): + if not path.is_dir() and not ( + self.config.repositories or self.config.definitions.region.country + ): raise NotADirectoryError(f"Definitions directory not found: {path}") self.dimensions = dimensions or ["region", "variable"] diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py index 99764cde..eee5ec6d 100644 --- a/nomenclature/processor/region.py +++ b/nomenclature/processor/region.py @@ -483,15 +483,14 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition): mapping_files = [f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}] - if dsd.config and dsd.config.mappings: - for repository in dsd.config.mappings.repositories: - mapping_files.extend( - f - for f in ( - dsd.config.repositories[repository].local_path / "mappings" - ).glob("**/*") - if f.suffix in {".yaml", ".yml"} - ) + for repository in dsd.config.mappings.repositories: + mapping_files.extend( + f + for f in ( + dsd.config.repositories[repository].local_path / "mappings" + ).glob("**/*") + if f.suffix in {".yaml", ".yml"} + ) for file in mapping_files: try: diff --git a/tests/data/general-config-only-country/nomenclature.yaml b/tests/data/general-config-only-country/nomenclature.yaml new file mode 100644 index 00000000..5217775c --- /dev/null +++ b/tests/data/general-config-only-country/nomenclature.yaml @@ -0,0 +1,3 @@ +definitions: + region: + country: true diff --git a/tests/test_definition.py b/tests/test_definition.py index 2de7187e..87dacba5 100644 --- a/tests/test_definition.py +++ b/tests/test_definition.py @@ -64,6 +64,14 @@ def test_definition_from_general_config(workflow_folder): clean_up_external_repos(obs.config.repositories) +def test_definition_general_config_country_only(): + obs = DataStructureDefinition( + TEST_DATA_DIR / "general-config-only-country" / "definitions", + dimensions=["region"], + ) + assert all(region in obs.region for region in ("Austria", "Bolivia", "Kosovo")) + + def test_to_excel(simple_definition, tmpdir): """Check writing a DataStructureDefinition to file""" file = tmpdir / "testing_export.xlsx"