diff --git a/nomenclature/cli.py b/nomenclature/cli.py index 1a0f45e5..71724252 100644 --- a/nomenclature/cli.py +++ b/nomenclature/cli.py @@ -1,13 +1,14 @@ -from pathlib import Path -from typing import List, Optional import importlib.util import sys +from pathlib import Path +from typing import List, Optional import click - +import pandas as pd from pyam import IamDataFrame -from nomenclature.definition import DataStructureDefinition + from nomenclature.codelist import VariableCodeList +from nomenclature.definition import DataStructureDefinition from nomenclature.processor import RegionProcessor from nomenclature.testing import assert_valid_structure, assert_valid_yaml @@ -189,6 +190,50 @@ def cli_export_definitions_to_excel( DataStructureDefinition(path / "definitions").to_excel(target) +@cli.command("diff-definitions") +@click.argument("source", type=click.Path(exists=True, path_type=Path)) +@click.argument("target", type=click.Path(exists=True, path_type=Path)) +@click.option("--sheet_name", default="variable") +@click.option("--output", type=click.Path(path_type=Path), default="diff.xlsx") +def cli_diff_definitions_to_excel( + source: Path, + target: Path, + sheet_name: Optional[str], + output: Optional[Path], +): + """Report the difference between two excel sheets generated by `export-definitions`. + + Values in `source` but not in `target` are placed in the column named "source". + Values in `target` but not `source` are placed in a column named "target". + + Parameters + ---------- + source : Path + Path and file name for the source file + target : Path + Path and file name for the target file + sheet_name : Optional[str] + If given, exports the results from region processing to a file called + `processed_data`, by default "variable" + output : Optional[Path] + If given, exports the results from the diff to a file called + `processed_data`, by default "diff.xlsx" + """ + s_col = set(pd.read_excel(source, sheet_name=sheet_name)[sheet_name]) + t_col = set(pd.read_excel(target, sheet_name=sheet_name)[sheet_name]) + diff = pd.concat( + [ + pd.Series(list(s_col.difference(t_col)), name=source), + pd.Series(list(t_col.difference(s_col)), name=target), + ], + axis="columns", + ) + if output: + diff.to_excel(output, sheet_name=sheet_name, index=False) + else: + print(output) + + @cli.command("list-missing-variables") @click.argument("data", type=click.Path(exists=True, path_type=Path)) @click.option(