Skip to content

Commit

Permalink
Add defaults and check_illegal_characters field
Browse files Browse the repository at this point in the history
  • Loading branch information
dc-almeida committed Nov 13, 2024
1 parent 0816561 commit 61a71d9
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 12 deletions.
18 changes: 10 additions & 8 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,22 +335,24 @@ def read_excel(cls, name, source, sheet_name, col, attrs=None):

return cls(name=name, mapping=mapp)

def check_illegal_characters(
self, config: NomenclatureConfig = None
) -> Dict[str, Code]:
def check_illegal_characters(self, config: NomenclatureConfig) -> Dict[str, Code]:
"""Check that no illegal characters are left in codes after tag replacement"""
forbidden = ["{", "}"]
if config and config.illegal_characters:
forbidden += config.illegal_characters
illegal = (
["{", "}"] + config.illegal_characters
if config.check_illegal_characters
else ["{", "}"]
)

def _check_string(value):
if isinstance(value, str):
if any(char in value for char in forbidden):
if any(char in value for char in illegal):
raise ValueError(
f"Unexpected character in {self.name}: '{code.name}'."
" Check for illegal characters and/or if tags were spelled correctly."
)
elif isinstance(value, dict):
for k in value.keys():
_check_string(k)
for v in value.values():
_check_string(v)
elif isinstance(value, list):
Expand All @@ -360,7 +362,7 @@ def _check_string(value):
for code in self.mapping.values():
if not code.repository:
for attr in code.model_fields:
if attr not in ["file", "repository"]:
if attr != "file":
value = getattr(code, attr)
_check_string(value)

Expand Down
5 changes: 3 additions & 2 deletions nomenclature/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,14 @@ class NomenclatureConfig(BaseModel):
repositories: dict[str, Repository] = Field(default_factory=dict)
definitions: DataStructureConfig = Field(default_factory=DataStructureConfig)
mappings: RegionMappingConfig = Field(default_factory=RegionMappingConfig)
illegal_characters: None | list[str] = None
check_illegal_characters: bool = True
illegal_characters: list[str] = ["'", ":", ";"]

model_config = ConfigDict(use_enum_values=True)

@field_validator("illegal_characters", mode="before")
@classmethod
def check_illegal_characters(cls, v: str | list[str]) -> list[str]:
def check_illegal_chars(cls, v: str | list[str]) -> list[str]:
return v if isinstance(v, list) else [v]

@model_validator(mode="after")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ definitions:
variable:
repository:
- common-definitions
illegal_characters: ['"' , ";"] # these are known to be present in common-definitions variables
check_illegal_characters: true
illegal_characters: ['"' , ";"] # these are known to be present in common-definitions variables
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ dimensions:
definitions:
region:
country: true
illegal_characters: [";", ":"]
2 changes: 1 addition & 1 deletion tests/test_codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def test_stray_tag_fails(subfolder, match):
code_list = VariableCodeList.from_directory(
"variable", MODULE_TEST_DATA_DIR / "stray_tag" / subfolder
)
code_list.check_illegal_characters()
code_list.check_illegal_characters(NomenclatureConfig(dimensions=["variable"]))


def test_illegal_char_fails():
Expand Down

0 comments on commit 61a71d9

Please sign in to comment.