Skip to content

Commit

Permalink
Use multiple external repos for definitions and mappings (#311)
Browse files Browse the repository at this point in the history
* Add output_directory option to parse_model_registration

* Update config to use multiple repositories

* Update codelist to use multiple external repos

* Update RegionProcessor to use multiple external repos

* Remove unnecessary repository_dimension_path

* Update test for multiple external repos

* Fix repository set for definitions and mappings

* Add tests for config

* Add test for multiple external repos

* Revert to previous suggestion from @danielhuppmann

* Apply suggestions from code review

Co-authored-by: Daniel Huppmann <dh@dergelbesalon.at>

* Finish renaming in repo in loop

* Switch cast to set order to clarity

* Allow errors for rmtree for windows debugging

* Attempt fix for failing Windows test

* Remove all clean-up for Windows fix

* Change repo permissions on Windows to delete

* Re-introduce external repo cleanup

* Add explicit errors for debugging

* Try IWRITE for Windows permission change

* Try onerror for rmtree

* Get explicit removing errors

* Update docs for usage of multiple repos

---------

Co-authored-by: Daniel Huppmann <dh@dergelbesalon.at>
  • Loading branch information
phackstock and danielhuppmann authored Jan 23, 2024
1 parent a9c47dc commit 87311de
Show file tree
Hide file tree
Showing 13 changed files with 158 additions and 58 deletions.
14 changes: 10 additions & 4 deletions docs/user_guide/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ General project configuration
The following features can be accessed via a general project configuration file:

* Import codelists and mappings from public GitHub repositories
* Add all countries to the region codelist (details: :ref:`countries`)
* Add all countries to the region codelist (details: :ref:`countries`)

Configuration file format
-------------------------
Expand All @@ -24,19 +24,23 @@ Importing from an external repository
In order to import from an external repository, the configuration file must define the
repository and the repositories key.

The repository has a **name** (in the example below *common-definitions*) and a **url**:
The repository has a **name** (in the example below *common-definitions*) and a **url**. Multiple repositories can be used in a single configuration file:

.. code:: yaml
repositories:
common-definitions:
url: https://github.com/IAMconsortium/common-definitions.git/
legacy-definitions:
url: https://github.com/IAMconsortium/legacy-definitions.git/
In order for the import to work the url must be given in the above format, i.e. with the
leading *https://* and the trailing *.git/*.

Information from an external repository can either be used for codelists ("definitions")
or model mappings, or both.
or model mappings, or both. For each definition dimension, i.e. *region* or *variable*
multiple external repositories can be used as the example below illustrates for
*variable*:

.. code:: yaml
Expand All @@ -47,7 +51,9 @@ or model mappings, or both.
region:
repository: common-definitions
variable:
repository: common-definitions
repositories:
- common-definitions
- legacy-definitions
mappings:
repository: common-definitions
Expand Down
29 changes: 14 additions & 15 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,17 +202,17 @@ def from_directory(
with suppress(AttributeError):
dimension = path.name
codelistconfig = getattr(config.definitions, dimension)
repo_path = (
config.repositories[codelistconfig.repository].local_path
/ codelistconfig.repository_dimension_path
)
code_list = (
cls._parse_codelist_dir(
repo_path,
file_glob_pattern,
for repo in codelistconfig.repositories:
repo_path = (
config.repositories[repo].local_path / "definitions" / dimension
)
code_list = (
cls._parse_codelist_dir(
repo_path,
file_glob_pattern,
)
+ code_list
)
+ code_list
)

mapping: Dict[str, Code] = {}
for code in code_list:
Expand Down Expand Up @@ -615,17 +615,16 @@ def from_directory(
code_list.append(RegionCode(name=c.name, hierarchy="Country"))

# importing from an external repository
if config.definitions.region.repository:
for repo in config.definitions.region.repositories:
repo_path = (
config.repositories[config.definitions.region.repository].local_path
/ config.definitions.region.repository_dimension_path
config.repositories[repo].local_path / "definitions" / "region"
)

code_list = cls._parse_region_code_dir(
code_list,
repo_path,
file_glob_pattern,
repository=config.definitions.region.repository,
repository=config.definitions.region.repositories,
)
code_list = cls._parse_and_replace_tags(
code_list, repo_path, file_glob_pattern
Expand All @@ -639,7 +638,7 @@ def from_directory(
mapping: Dict[str, RegionCode] = {}
for code in code_list:
if code.name in mapping:
raise DuplicateCodeError(name=name, code=code.name)
raise ValueError(f"Trying to set a duplicate code {code.name}")
mapping[code.name] = code

return cls(name=name, mapping=mapping)
Expand Down
60 changes: 41 additions & 19 deletions nomenclature/config.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,41 @@
from pathlib import Path
from typing import Dict, Optional
from typing import Annotated, Optional

import yaml
from git import Repo
from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator
from pydantic import (
BaseModel,
Field,
ValidationInfo,
field_validator,
model_validator,
ConfigDict,
BeforeValidator,
)


def convert_to_set(v: str | list[str] | set[str]) -> set[str]:
match v:
case set(v):
return v
case list(v):
return set(v)
case str(v):
return {v}
case _:
raise TypeError("`repositories` must be of type str, list or set.")


class CodeListConfig(BaseModel):
dimension: str
repository: str | None = None
repository_dimension_path: Path | None = None
repositories: Annotated[set[str] | None, BeforeValidator(convert_to_set)] = Field(
None, alias="repository"
)
model_config = ConfigDict(populate_by_name=True)

@model_validator(mode="after")
@classmethod
def set_repository_dimension_path(cls, v: "CodeListConfig") -> "CodeListConfig":
if v.repository is not None and v.repository_dimension_path is None:
v.repository_dimension_path = f"definitions/{v.dimension}"
return v
@property
def repository_dimension_path(self) -> str:
return f"definitions/{self.dimension}"


class RegionCodeListConfig(CodeListConfig):
Expand Down Expand Up @@ -84,20 +103,23 @@ def add_dimension(cls, v, info: ValidationInfo):
return {"dimension": info.field_name, **v}

@property
def repos(self) -> Dict[str, str]:
def repos(self) -> dict[str, str]:
return {
dimension: getattr(self, dimension).repository
dimension: getattr(self, dimension).repositories
for dimension in ("region", "variable")
if getattr(self, dimension) and getattr(self, dimension).repository
if getattr(self, dimension) and getattr(self, dimension).repositories
}


class RegionMappingConfig(BaseModel):
repository: str
repositories: Annotated[set[str], BeforeValidator(convert_to_set)] = Field(
..., alias="repository"
)
model_config = ConfigDict(populate_by_name=True)


class NomenclatureConfig(BaseModel):
repositories: Dict[str, Repository] = {}
repositories: dict[str, Repository] = {}
definitions: Optional[DataStructureConfig] = None
mappings: Optional[RegionMappingConfig] = None

Expand All @@ -107,11 +129,11 @@ def check_definitions_repository(
cls, v: "NomenclatureConfig"
) -> "NomenclatureConfig":
definitions_repos = v.definitions.repos if v.definitions else {}
mapping_repos = {"mappings": v.mappings.repository} if v.mappings else {}
mapping_repos = {"mappings": v.mappings.repositories} if v.mappings else {}
repos = {**definitions_repos, **mapping_repos}
for use, repository in repos.items():
if repository not in v.repositories:
raise ValueError((f"Unknown repository '{repository}' in {use}."))
for use, repositories in repos.items():
if repositories - v.repositories.keys():
raise ValueError((f"Unknown repository {repositories} in '{use}'."))
return v

def fetch_repos(self, target_folder: Path):
Expand Down
16 changes: 8 additions & 8 deletions nomenclature/processor/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,14 +482,14 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition):
mapping_files = [f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}]

if dsd.config and dsd.config.mappings:
mapping_files = [
f
for f in (
dsd.config.repositories[dsd.config.mappings.repository].local_path
/ "mappings"
).glob("**/*")
if f.suffix in {".yaml", ".yml"}
] + mapping_files
for repository in dsd.config.mappings.repositories:
mapping_files.extend(
f
for f in (
dsd.config.repositories[repository].local_path / "mappings"
).glob("**/*")
if f.suffix in {".yaml", ".yml"}
)

for file in mapping_files:
try:
Expand Down
17 changes: 13 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from pathlib import Path
import pytest
import shutil
import sys
import os
import stat
from pathlib import Path

import pandas as pd
import pytest
from pyam import IamDataFrame
from pyam.utils import IAMC_IDX
from nomenclature import DataStructureDefinition

from nomenclature import DataStructureDefinition

here = Path(__file__).parent
TEST_DATA_DIR = here / "data"
Expand Down Expand Up @@ -48,8 +52,13 @@ def add_meta(df):
df.set_meta(["foo", "bar"], "string")


def remove_readonly(func, path, excinfo):
os.chmod(path, stat.S_IWRITE)
func(path)


def clean_up_external_repos(repos):
# clean up the external repo
for repository in repos.values():
if repository.local_path.exists():
shutil.rmtree(repository.local_path, ignore_errors=True)
shutil.rmtree(repository.local_path, onerror=remove_readonly)
1 change: 0 additions & 1 deletion tests/data/general-config-only/nomenclature.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ definitions:
country: true
variable:
repository: common-definitions
repository_dimension_path: definitions/variable
1 change: 0 additions & 1 deletion tests/data/general-config/nomenclature.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ definitions:
country: true
variable:
repository: common-definitions
repository_dimension_path: definitions/variable
1 change: 0 additions & 1 deletion tests/data/nomenclature_configs/hash_and_release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@ definitions:
country: true
variable:
repository: common-definitions
repository_dimension_path: definitions/variable
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
repositories:
common-definitions:
url: https://github.com/IAMconsortium/common-definitions.git/
legacy-definitions:
url: https://github.com/IAMconsortium/legacy-definitions.git/
mappings:
repositories:
- common-definitions
- legacy-definitions
10 changes: 10 additions & 0 deletions tests/data/nomenclature_configs/multiple_repos_per_dimension.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
repositories:
common-definitions:
url: https://github.com/IAMconsortium/common-definitions.git/
legacy-definitions:
url: https://github.com/IAMconsortium/legacy-definitions.git/
definitions:
variable:
repository:
- common-definitions
- legacy-definitions
Empty file.
20 changes: 18 additions & 2 deletions tests/test_codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
import pandas.testing as pdt
import logging

from nomenclature.code import Code, RegionCode, MetaCode, VariableCode
from nomenclature.code import Code, RegionCode, MetaCode
from nomenclature.codelist import (
CodeList,
VariableCodeList,
RegionCodeList,
MetaCodeList,
)
from nomenclature.config import NomenclatureConfig

from conftest import TEST_DATA_DIR
from conftest import TEST_DATA_DIR, clean_up_external_repos


def test_simple_codelist():
Expand Down Expand Up @@ -323,3 +324,18 @@ def test_MetaCodeList_from_directory():
}
exp = MetaCodeList(name="Meta", mapping=mapping)
assert obs == exp


def test_multiple_external_repos():
nomenclature_config = NomenclatureConfig.from_file(
TEST_DATA_DIR / "nomenclature_configs" / "multiple_repos_per_dimension.yaml"
)
try:
with raises(ValueError, match="Duplicate"):
variable_code_list = VariableCodeList.from_directory(
"variable",
TEST_DATA_DIR / "nomenclature_configs" / "variable",
nomenclature_config,
)
finally:
clean_up_external_repos(nomenclature_config.repositories)
38 changes: 35 additions & 3 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path
from pytest import raises

from nomenclature.config import Repository, NomenclatureConfig
from nomenclature.config import Repository, NomenclatureConfig, CodeListConfig

from conftest import TEST_DATA_DIR
from conftest import TEST_DATA_DIR, clean_up_external_repos


def test_hash_and_release_raises():
Expand All @@ -19,7 +19,39 @@ def test_setting_local_path_raises():


def test_unknown_repo_raises():
with raises(ValueError, match="Unknown repository 'common-definitions'"):
with raises(
ValueError, match="Unknown repository {'common-definitions'} in 'region'"
):
NomenclatureConfig.from_file(
TEST_DATA_DIR / "nomenclature_configs" / "unknown_repo.yaml"
)


def test_multiple_definition_repos():
nomenclature_config = NomenclatureConfig.from_file(
TEST_DATA_DIR / "nomenclature_configs" / "multiple_repos_per_dimension.yaml"
)
try:
exp_repos = {"common-definitions", "legacy-definitions"}
assert nomenclature_config.repositories.keys() == exp_repos
assert nomenclature_config.definitions.variable.repositories == exp_repos
finally:
clean_up_external_repos(nomenclature_config.repositories)


def test_codelist_config_set_input():
exp_repos = {"repo1", "repo2"}
code_list_config = CodeListConfig(dimension="variable", repositories=exp_repos)
assert code_list_config.repositories == exp_repos


def test_multiple_mapping_repos():
nomenclature_config = NomenclatureConfig.from_file(
TEST_DATA_DIR / "nomenclature_configs" / "multiple_repos_for_mapping.yaml"
)
try:
exp_repos = {"common-definitions", "legacy-definitions"}
assert nomenclature_config.mappings.repositories == exp_repos
assert nomenclature_config.repositories.keys() == exp_repos
finally:
clean_up_external_repos(nomenclature_config.repositories)

0 comments on commit 87311de

Please sign in to comment.