Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the to_excel() method and add to CLI #331

Merged
merged 17 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions nomenclature/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,42 @@ def check_region_aggregation(
results_df.to_excel(processed_data)
if differences:
differences_df.reset_index().to_excel(differences, index=False)


@cli.command("export-project")
@click.argument("path", type=click.Path(exists=True, path_type=Path))
@click.argument("target", type=click.Path(path_type=Path))
@click.option(
"--definitions",
help="Optional name for definitions folder",
type=str,
default="definitions",
)
@click.option(
"--dimension",
"dimensions",
help="Optional list of dimensions",
type=str,
multiple=True,
default=None,
)
def cli_export_project_to_excel(
path: Path,
target: Path,
definitions: str,
dimensions: Optional[List[str]],
):
"""Assert that `path` is a valid project nomenclature

Parameters
----------
path : Path
Project directory to be exported
target : Path
Path and file name for the exported file
definitions : str, optional
Name of the definitions folder, defaults to "definitions"
dimensions : List[str], optional
Dimensions to be checked, defaults to all sub-folders of `definitions`
"""
DataStructureDefinition(path / definitions, dimensions=dimensions).to_excel(target)
3 changes: 0 additions & 3 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,9 +325,6 @@ def to_pandas(self, sort_by_code: bool = False) -> pd.DataFrame:
)
if sort_by_code:
codelist.sort_values(by=self.name, inplace=True)
codelist.rename(
columns={c: str(c).capitalize() for c in codelist.columns}, inplace=True
)
return codelist

def to_csv(self, path=None, sort_by_code: bool = False, **kwargs):
Expand Down
73 changes: 64 additions & 9 deletions nomenclature/definition.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import logging
from datetime import datetime
from pathlib import Path

import pandas as pd
import git
from pyam import IamDataFrame
from pyam.index import replace_index_labels
from pyam.logging import adjust_log_level
from pyam.utils import write_sheet

from nomenclature.codelist import (
CodeList,
Expand Down Expand Up @@ -41,11 +44,18 @@ def __init__(self, path, dimensions=None):
if not isinstance(path, Path):
path = Path(path)

if (file := path.parent / "nomenclature.yaml").exists():
self.working_dir = path.parent

if (file := self.working_dir / "nomenclature.yaml").exists():
self.config = NomenclatureConfig.from_file(file=file)
else:
self.config = NomenclatureConfig()

try:
self.repo = git.Repo(self.working_dir)
except git.InvalidGitRepositoryError:
self.repo = None

if not path.is_dir() and not (
self.config.repositories or self.config.definitions.region.country
):
Expand Down Expand Up @@ -136,22 +146,67 @@ def check_aggregate(self, df: IamDataFrame, **kwargs) -> None:
error = pd.concat(lst)
return error if not error.empty else None

def to_excel(
self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs
):
"""Write the *variable* codelist to an Excel sheet
def to_excel(self, excel_writer, sort_by_code: bool = False, **kwargs):
"""Write the codelists to an xlsx spreadsheet

Parameters
----------
excel_writer : path-like, file-like, or ExcelWriter object
File path as string or :class:`pathlib.Path`,
or existing :class:`pandas.ExcelWriter`.
sheet_name : str, optional
Name of sheet that will have the codelist. If *None*, use the codelist name.
sort_by_code : bool, optional
Sort the codelist before exporting to file.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
"""
# TODO write all dimensions to the file
self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs)
with pd.ExcelWriter(excel_writer, engine="xlsxwriter", **kwargs) as writer:

# create dataframe with attributes of the DataStructureDefinition
arg_dict = {
"project": self.working_dir.absolute().parts[-1],
"file created": time_format(datetime.now()),
"": "",
}
if self.repo is not None:
arg_dict.update(git_attributes("", self.repo))

ret = make_dataframe(arg_dict)

for key, value in self.config.repositories.items():
ret = pd.concat(
[
ret,
make_dataframe(git_attributes(key, git.Repo(value.local_path))),
]
)

write_sheet(writer, "project", ret)

# write codelist for each dimensions to own sheet
for dim in self.dimensions:
getattr(self, dim).to_excel(writer, dim, sort_by_code)


def time_format(x):
return x.strftime("%Y-%m-%d %H:%M:%S")


def git_attributes(name, repo):
return {
"repository": name,
"url": repo.remote().url,
"commit": repo.commit(),
"timestamp": time_format(repo.commit().committed_datetime),
}


def make_dataframe(data):
return (
pd.DataFrame.from_dict(
data,
orient="index",
columns=["value"],
)
.reset_index()
.rename(columns={"index": "attribute"})
)
Binary file modified tests/data/excel_io/validation_nc.xlsx
Binary file not shown.
Binary file modified tests/data/excel_io/validation_nc_list_arg.xlsx
Binary file not shown.
21 changes: 21 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def test_cli_installed():
"check-region-aggregation",
"validate-project",
"validate-yaml",
"export-project",
)
)

Expand Down Expand Up @@ -312,3 +313,23 @@ def test_check_region_aggregation(tmp_path):
)
)
assert_iamframe_equal(IamDataFrame(tmp_path / "results.xlsx"), exp_result)


def test_cli_export_to_excel(simple_definition, tmpdir):
"""Assert that writing to excel works as expected"""
file = tmpdir / "testing_export.xlsx"

assert (
runner.invoke(
cli,
[
"export-project",
str(TEST_DATA_DIR / "general-config"),
str(file),
],
).exit_code
== 0
)

obs = pd.ExcelFile(file)
assert obs.sheet_names == ["project", "region", "variable"]
10 changes: 5 additions & 5 deletions tests/test_codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def test_to_excel(tmpdir):

(
VariableCodeList.from_directory(
"Variable", TEST_DATA_DIR / "validation_nc" / "variable"
"variable", TEST_DATA_DIR / "validation_nc" / "variable"
).to_excel(file)
)

Expand All @@ -148,11 +148,11 @@ def test_to_excel(tmpdir):
def test_to_csv():
"""Check writing to csv"""
obs = VariableCodeList.from_directory(
"Variable", TEST_DATA_DIR / "simple_codelist"
"variable", TEST_DATA_DIR / "simple_codelist"
).to_csv(lineterminator="\n")

exp = (
"Variable,Description,Unit,Skip-region-aggregation,Bool\n"
"variable,description,unit,skip-region-aggregation,bool\n"
"Some Variable,Some basic variable,,False,True\n"
)
assert obs == exp
Expand Down Expand Up @@ -207,8 +207,8 @@ def test_to_excel_read_excel_roundtrip(tmpdir):
"variable",
tmpdir / "output.xlsx",
"variable",
"Variable",
attrs=["Description", "Unit", "Region-aggregation"],
"variable",
attrs=["description", "unit", "region-aggregation"],
)

assert obs == exp
Expand Down
23 changes: 19 additions & 4 deletions tests/test_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,18 +78,33 @@ def test_to_excel(simple_definition, tmpdir):

simple_definition.to_excel(file)

obs = pd.read_excel(file)
obs = pd.read_excel(file, sheet_name="variable")
exp = pd.read_excel(TEST_DATA_DIR / "excel_io" / "validation_nc.xlsx")
pd.testing.assert_frame_equal(obs, exp)


def test_to_excel_with_external_repo(tmpdir):
"""Check writing a DataStructureDefinition with an external repo to file"""
file = tmpdir / "testing_export.xlsx"

dsd = DataStructureDefinition(TEST_DATA_DIR / "general-config" / "definitions")
dsd.to_excel(file)

obs = pd.ExcelFile(file)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although not a critical as in the pyam issue IAMconsortium/pyam#817, I'd also use a context manager here to make sure the file is properly closed at the end of the test:

Suggested change
obs = pd.ExcelFile(file)
with pd.ExcelFile(file) as obs:

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implemented, thanks for the suggestion

assert obs.sheet_names == ["project", "region", "variable"]

obs_project = obs.parse("project")
exp = pd.DataFrame([["project", "general-config"]], columns=["attribute", "value"])
pd.testing.assert_frame_equal(exp, obs_project[0:1])


@pytest.mark.parametrize(
"input_file, attrs, exp_file",
[
("validation_nc.xlsx", ["Description", "Unit"], "validation_nc_flat.yaml"),
("validation_nc.xlsx", ["description", "unit"], "validation_nc_flat.yaml"),
(
"validation_nc_list_arg.xlsx",
["Description", "Unit", "Region-aggregation"],
["description", "unit", "region-aggregation"],
"validation_nc_list_arg.yaml",
),
],
Expand All @@ -102,7 +117,7 @@ def test_create_yaml_from_xlsx(input_file, attrs, exp_file, tmpdir):
source=TEST_DATA_DIR / "excel_io" / input_file,
target=file,
sheet_name="variable_definitions",
col="Variable",
col="variable",
attrs=attrs,
)

Expand Down
Loading