Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect duplicate code errors #314

Merged
17 changes: 12 additions & 5 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import nomenclature
from nomenclature.code import Code, MetaCode, RegionCode, VariableCode
from nomenclature.config import NomenclatureConfig
from nomenclature.error import custom_pydantic_errors
from nomenclature.error import custom_pydantic_errors, ErrorCollector
from pyam.utils import is_list_like

here = Path(__file__).parent.absolute()
Expand Down Expand Up @@ -213,12 +213,16 @@ def from_directory(
)
+ code_list
)

errors = ErrorCollector()
mapping: Dict[str, Code] = {}
for code in code_list:
if code.name in mapping:
raise ValueError(f"Duplicate item in {name} codelist: {code.name}")
errors.append(
ValueError(f"Duplicate item in {name} codelist: {code.name}")
)
mapping[code.name] = code
if errors:
raise ValueError(errors)
return cls(name=name, mapping=mapping)

@classmethod
Expand Down Expand Up @@ -636,11 +640,14 @@ def from_directory(

# translate to mapping
mapping: Dict[str, RegionCode] = {}

errors = ErrorCollector()
for code in code_list:
if code.name in mapping:
raise ValueError(f"Trying to set a duplicate code {code.name}")
errors.append(ValueError(f"Trying to set a duplicate code {code.name}"))
mapping[code.name] = code

if errors:
raise ValueError(errors)
return cls(name=name, mapping=mapping)

@property
Expand Down
9 changes: 7 additions & 2 deletions nomenclature/error.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import textwrap
from collections import namedtuple

pydantic_custom_error_config = {
Expand Down Expand Up @@ -55,8 +56,12 @@ def append(self, error: Exception) -> None:

def __repr__(self) -> str:
error = "error" if len(self.errors) == 1 else "errors"
return f"Collected {len(self.errors)} {error}:\n" + "\n\t".join(
str(error) for error in self.errors
error_list_str = "\n".join(
f"{i+1}. {error}" for i, error in enumerate(self.errors)
)

return f"Collected {len(self.errors)} {error}:\n" + textwrap.indent(
error_list_str, prefix=" "
)

def __bool__(self) -> bool:
Expand Down
1 change: 0 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import shutil
import sys
import os
import stat
from pathlib import Path
Expand Down
12 changes: 12 additions & 0 deletions tests/data/duplicate-code-list/variable/duplicate_entries.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
- Some Variable:
description:
unit:
- Some Variable:
description:
unit:
- Some other Variable:
description:
unit:
- Some other Variable:
description:
unit:
31 changes: 25 additions & 6 deletions tests/test_codelist.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pytest import raises
import pandas as pd
import pandas.testing as pdt
import pytest
import logging

from nomenclature.code import Code, RegionCode, MetaCode
Expand Down Expand Up @@ -331,11 +332,29 @@ def test_multiple_external_repos():
TEST_DATA_DIR / "nomenclature_configs" / "multiple_repos_per_dimension.yaml"
)
try:
with raises(ValueError, match="Duplicate"):
variable_code_list = VariableCodeList.from_directory(
"variable",
TEST_DATA_DIR / "nomenclature_configs" / "variable",
nomenclature_config,
)
variable_code_list = VariableCodeList.from_directory(
"variable",
TEST_DATA_DIR / "nomenclature_configs" / "variable",
nomenclature_config,
)
assert nomenclature_config.repositories.keys() == {
"common-definitions",
"legacy-definitions",
}

assert all(
repo.local_path.is_dir()
for repo in nomenclature_config.repositories.values()
)
assert len(variable_code_list) > 2000
finally:
clean_up_external_repos(nomenclature_config.repositories)


@pytest.mark.parametrize("CodeList", [VariableCodeList, CodeList])
def test_variable_codelist_with_duplicates_raises(CodeList):
error_string = "2 errors:\n.*Some Variable\n.*Some other Variable"
with raises(ValueError, match=error_string):
CodeList.from_directory(
"variable", TEST_DATA_DIR / "duplicate-code-list" / "variable"
)
Loading