Skip to content

Commit

Permalink
add cli tool to look at differences between two output definition files
Browse files Browse the repository at this point in the history
  • Loading branch information
gidden committed Nov 14, 2024
1 parent e180be5 commit 2f1a2bc
Showing 1 changed file with 49 additions and 4 deletions.
53 changes: 49 additions & 4 deletions nomenclature/cli.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from pathlib import Path
from typing import List, Optional
import importlib.util
import sys
from pathlib import Path
from typing import List, Optional

import click

import pandas as pd
from pyam import IamDataFrame
from nomenclature.definition import DataStructureDefinition

from nomenclature.codelist import VariableCodeList
from nomenclature.definition import DataStructureDefinition
from nomenclature.processor import RegionProcessor
from nomenclature.testing import assert_valid_structure, assert_valid_yaml

Expand Down Expand Up @@ -189,6 +190,50 @@ def cli_export_definitions_to_excel(
DataStructureDefinition(path / "definitions").to_excel(target)


@cli.command("diff-definitions")
@click.argument("source", type=click.Path(exists=True, path_type=Path))
@click.argument("target", type=click.Path(exists=True, path_type=Path))
@click.option("--sheet_name", default="variable")
@click.option("--output", type=click.Path(path_type=Path), default="diff.xlsx")
def cli_diff_definitions_to_excel(
source: Path,
target: Path,
sheet_name: Optional[str],
output: Optional[Path],
):
"""Report the difference between two excel sheets generated by `export-definitions`.
Values in `source` but not in `target` are placed in the column named "source".
Values in `target` but not `source` are placed in a column named "target".
Parameters
----------
source : Path
Path and file name for the source file
target : Path
Path and file name for the target file
sheet_name : Optional[str]
If given, exports the results from region processing to a file called
`processed_data`, by default "variable"
output : Optional[Path]
If given, exports the results from the diff to a file called
`processed_data`, by default "diff.xlsx"
"""
s_col = set(pd.read_excel(source, sheet_name=sheet_name)[sheet_name])
t_col = set(pd.read_excel(target, sheet_name=sheet_name)[sheet_name])
diff = pd.concat(
[
pd.Series(list(s_col.difference(t_col)), name=source),
pd.Series(list(t_col.difference(s_col)), name=target),
],
axis="columns",
)
if output:
diff.to_excel(output, sheet_name=sheet_name, index=False)
else:
print(output)


@cli.command("list-missing-variables")
@click.argument("data", type=click.Path(exists=True, path_type=Path))
@click.option(
Expand Down

0 comments on commit 2f1a2bc

Please sign in to comment.