Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the to_excel() method and add to CLI #331

Merged
merged 17 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions nomenclature/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,22 @@ def check_region_aggregation(
results_df.to_excel(processed_data)
if differences:
differences_df.reset_index().to_excel(differences, index=False)


@cli.command("export-definition")
@click.argument("path", type=click.Path(exists=True, path_type=Path))
@click.argument("target", type=click.Path(path_type=Path))
def cli_export_definition_to_excel(
path: Path,
target: Path,
):
"""Assert that `path` is a valid project nomenclature

Parameters
----------
path : Path
Project directory to be exported
target : Path
Path and file name for the exported file
"""
DataStructureDefinition(path / "definitions").to_excel(target)
25 changes: 6 additions & 19 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,9 +325,6 @@ def to_pandas(self, sort_by_code: bool = False) -> pd.DataFrame:
)
if sort_by_code:
codelist.sort_values(by=self.name, inplace=True)
codelist.rename(
columns={c: str(c).capitalize() for c in codelist.columns}, inplace=True
)
return codelist

def to_csv(self, path=None, sort_by_code: bool = False, **kwargs):
Expand Down Expand Up @@ -368,22 +365,12 @@ def to_excel(
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
"""

# default sheet_name to the name of the codelist
if sheet_name is None:
sheet_name = self.name

# open a new ExcelWriter instance (if necessary)
close = False
if not isinstance(excel_writer, pd.ExcelWriter):
close = True
excel_writer = pd.ExcelWriter(excel_writer, **kwargs)

write_sheet(excel_writer, sheet_name, self.to_pandas(sort_by_code))

# close the file if `excel_writer` arg was a file name
if close:
excel_writer.close()
sheet_name = sheet_name or self.name
if isinstance(excel_writer, pd.ExcelWriter):
write_sheet(excel_writer, sheet_name, self.to_pandas(sort_by_code))
else:
with pd.ExcelWriter(excel_writer, **kwargs) as writer:
write_sheet(writer, sheet_name, self.to_pandas(sort_by_code))

def codelist_repr(self, json_serialized=False) -> Dict:
"""Cast a CodeList into corresponding dictionary"""
Expand Down
87 changes: 72 additions & 15 deletions nomenclature/definition.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import logging
from datetime import datetime
from pathlib import Path

import pandas as pd
import git
from pyam import IamDataFrame
from pyam.index import replace_index_labels
from pyam.logging import adjust_log_level
from pyam.utils import write_sheet

from nomenclature.codelist import (
CodeList,
Expand Down Expand Up @@ -41,11 +44,18 @@ def __init__(self, path, dimensions=None):
if not isinstance(path, Path):
path = Path(path)

if (file := path.parent / "nomenclature.yaml").exists():
self.project_folder = path.parent

if (file := self.project_folder / "nomenclature.yaml").exists():
self.config = NomenclatureConfig.from_file(file=file)
else:
self.config = NomenclatureConfig()

try:
self.repo = git.Repo(self.project_folder)
except git.InvalidGitRepositoryError:
self.repo = None

if not path.is_dir() and not (
self.config.repositories or self.config.definitions.region.country
):
Expand Down Expand Up @@ -136,22 +146,69 @@ def check_aggregate(self, df: IamDataFrame, **kwargs) -> None:
error = pd.concat(lst)
return error if not error.empty else None

def to_excel(
self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs
):
"""Write the *variable* codelist to an Excel sheet
def to_excel(self, excel_writer, **kwargs):
"""Write the codelists to an xlsx spreadsheet

Parameters
----------
excel_writer : path-like, file-like, or ExcelWriter object
File path as string or :class:`pathlib.Path`,
or existing :class:`pandas.ExcelWriter`.
sheet_name : str, optional
Name of sheet that will have the codelist. If *None*, use the codelist name.
sort_by_code : bool, optional
Sort the codelist before exporting to file.
excel_writer : str or :class:`pathlib.Path`
File path as string or :class:`pathlib.Path`.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
Passed to :class:`pandas.ExcelWriter`
"""
# TODO write all dimensions to the file
self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs)
if "engine" not in kwargs:
kwargs["engine"] = "xlsxwriter"

with pd.ExcelWriter(excel_writer, **kwargs) as writer:

# create dataframe with attributes of the DataStructureDefinition
project = self.project_folder.absolute().parts[-1]
arg_dict = {
"project": project,
"file_created": time_format(datetime.now()),
"": "",
}
if self.repo is not None:
arg_dict.update(git_attributes(project, self.repo))

ret = make_dataframe(arg_dict)

for key, value in self.config.repositories.items():
ret = pd.concat(
[
ret,
make_dataframe(git_attributes(key, git.Repo(value.local_path))),
]
)

write_sheet(writer, "project", ret)

# write codelist for each dimensions to own sheet
for dim in self.dimensions:
getattr(self, dim).to_excel(writer, dim, sort_by_code=True)


def time_format(x):
return x.strftime("%Y-%m-%d %H:%M:%S")


def git_attributes(name, repo):
if repo.is_dirty():
raise ValueError(f"Repository '{name}' is dirty")
return {
f"{name}.url": repo.remote().url,
f"{name}.commit_hash": repo.commit(),
f"{name}.commit_timestamp": time_format(repo.commit().committed_datetime),
}


def make_dataframe(data):
return (
pd.DataFrame.from_dict(
data,
orient="index",
columns=["value"],
)
.reset_index()
.rename(columns={"index": "attribute"})
)
Binary file modified tests/data/excel_io/validation_nc.xlsx
Binary file not shown.
Binary file modified tests/data/excel_io/validation_nc_list_arg.xlsx
Binary file not shown.
21 changes: 21 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def test_cli_installed():
command in result.stdout
for command in (
"check-region-aggregation",
"export-definition",
"validate-project",
"validate-yaml",
)
Expand Down Expand Up @@ -312,3 +313,23 @@ def test_check_region_aggregation(tmp_path):
)
)
assert_iamframe_equal(IamDataFrame(tmp_path / "results.xlsx"), exp_result)


def test_cli_export_to_excel(tmpdir):
"""Assert that writing a DataStructureDefinition to excel works as expected"""
file = tmpdir / "testing_export.xlsx"

assert (
runner.invoke(
cli,
[
"export-definition",
str(TEST_DATA_DIR / "general-config"),
str(file),
],
).exit_code
== 0
)

with pd.ExcelFile(file) as obs:
assert obs.sheet_names == ["project", "region", "variable"]
10 changes: 5 additions & 5 deletions tests/test_codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def test_to_excel(tmpdir):

(
VariableCodeList.from_directory(
"Variable", TEST_DATA_DIR / "validation_nc" / "variable"
"variable", TEST_DATA_DIR / "validation_nc" / "variable"
).to_excel(file)
)

Expand All @@ -148,11 +148,11 @@ def test_to_excel(tmpdir):
def test_to_csv():
"""Check writing to csv"""
obs = VariableCodeList.from_directory(
"Variable", TEST_DATA_DIR / "simple_codelist"
"variable", TEST_DATA_DIR / "simple_codelist"
).to_csv(lineterminator="\n")

exp = (
"Variable,Description,Unit,Skip-region-aggregation,Bool\n"
"variable,description,unit,skip-region-aggregation,bool\n"
"Some Variable,Some basic variable,,False,True\n"
)
assert obs == exp
Expand Down Expand Up @@ -207,8 +207,8 @@ def test_to_excel_read_excel_roundtrip(tmpdir):
"variable",
tmpdir / "output.xlsx",
"variable",
"Variable",
attrs=["Description", "Unit", "Region-aggregation"],
"variable",
attrs=["description", "unit", "region-aggregation"],
)

assert obs == exp
Expand Down
23 changes: 19 additions & 4 deletions tests/test_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,18 +78,33 @@ def test_to_excel(simple_definition, tmpdir):

simple_definition.to_excel(file)

obs = pd.read_excel(file)
obs = pd.read_excel(file, sheet_name="variable")
exp = pd.read_excel(TEST_DATA_DIR / "excel_io" / "validation_nc.xlsx")
pd.testing.assert_frame_equal(obs, exp)


def test_to_excel_with_external_repo(tmpdir):
"""Check writing a DataStructureDefinition with an external repo to file"""
file = tmpdir / "testing_export.xlsx"

dsd = DataStructureDefinition(TEST_DATA_DIR / "general-config" / "definitions")
dsd.to_excel(file)

with pd.ExcelFile(file) as obs:
assert obs.sheet_names == ["project", "region", "variable"]

obs_project = obs.parse("project")
exp = pd.DataFrame([["project", "general-config"]], columns=["attribute", "value"])
pd.testing.assert_frame_equal(exp, obs_project[0:1])


@pytest.mark.parametrize(
"input_file, attrs, exp_file",
[
("validation_nc.xlsx", ["Description", "Unit"], "validation_nc_flat.yaml"),
("validation_nc.xlsx", ["description", "unit"], "validation_nc_flat.yaml"),
(
"validation_nc_list_arg.xlsx",
["Description", "Unit", "Region-aggregation"],
["description", "unit", "region-aggregation"],
"validation_nc_list_arg.yaml",
),
],
Expand All @@ -102,7 +117,7 @@ def test_create_yaml_from_xlsx(input_file, attrs, exp_file, tmpdir):
source=TEST_DATA_DIR / "excel_io" / input_file,
target=file,
sheet_name="variable_definitions",
col="Variable",
col="variable",
attrs=attrs,
)

Expand Down
Loading