diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index b7e0c94..85f4fda 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -21,6 +21,10 @@ jobs: run: | python -m pip install --upgrade pip pip install tox + - name: Create .env file + run: python create_env_file.py + - name: Start containers + run: docker-compose up -d - name: Run Tests and Record Coverage run: | tox -e coverage diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 6a3301d..95fbfee 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -29,6 +29,10 @@ # run: | # python -m pip install --upgrade pip # pip install tox +# - name: Create .env file +# run: python create_env_file.py +# - name: Start containers +# run: docker-compose up -d # - name: Run tox # run: | # tox diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 3edd536..009f6f9 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -21,6 +21,10 @@ jobs: run: | python -m pip install --upgrade pip pip install tox + - name: Create .env file + run: python create_env_file.py + - name: Start containers + run: docker-compose up -d - name: Run the Unit Tests via Tox run: | tox -e tests diff --git a/create_env_file.py b/create_env_file.py new file mode 100644 index 0000000..36c81b3 --- /dev/null +++ b/create_env_file.py @@ -0,0 +1,45 @@ +""" +This python script copies the example.env to .env if .env does not already exists. +This is similar to the bash command `mv example.env .env`. +It is used in all tox environments except the linting environment. +""" + +from pathlib import Path +from shutil import copyfile + + +def create_env_file(directory_path: Path): + """ + Checks if a file with the file name `destination_file_name` exists. + If yes, nothing will be done. + If not, it will copy the `source_file_name` file to the `destination_file_name` in the same directory. + """ + source_file_name: str = "env.example" + destination_file_name: str = ".env" + + path_to_env_file: Path = directory_path / destination_file_name + + if path_to_env_file.exists(): + print("Great, you have already an environment file.") + else: + print( + f"Uh I see you have no {destination_file_name} file in {directory_path}\n" + f"But do not worry, I have you covered, I try to copy for you the {source_file_name} file to" + f"{destination_file_name}" + ) + try: + copyfile(directory_path / source_file_name, path_to_env_file) + print("And we are done.\n Please update some credentials for your need, e.g. database credentials.") + except FileNotFoundError: + print( + f"I am so sorry, but the {source_file_name} file is gone. Please ask someone of you colleagues " + f"to help you." + ) + + +if __name__ == "__main__": + cwd_path = Path.cwd() + root_directory_path = cwd_path + if root_directory_path.parts[-1] == "tests": + root_directory_path = root_directory_path.parent + create_env_file(directory_path=root_directory_path) diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..266dfdc --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,7 @@ +version: "3.8" + +services: + kroki: # see https://docs.kroki.io/kroki/setup/use-docker-or-podman/#_run_multiple_kroki_containers_together + image: yuzutech/kroki:0.24.1 + ports: + - "8126:8000" diff --git a/env.example b/env.example new file mode 100644 index 0000000..f3924c6 --- /dev/null +++ b/env.example @@ -0,0 +1,2 @@ +#Environment for Kroki +KROKI_HOST=http://localhost:8126/ diff --git a/pyproject.toml b/pyproject.toml index cb3da12..7522b7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,9 +17,12 @@ classifiers = [ "Programming Language :: Python :: 3.12", ] dependencies = [ - "ebdamame>=0.1.1", - "rebdhuhn>=0.2.3" -] # add all the dependencies here + "ebdamame>=0.1.3", + "rebdhuhn>=0.2.3", + "cattrs", + "click", + # add all the dependencies here +] dynamic = ["readme", "version"] [project.urls] diff --git a/requirements.txt b/requirements.txt index f2e0ed5..90cdf3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile pyproject.toml @@ -8,12 +8,9 @@ attrs==23.2.0 # via # cattrs # ebdamame - # ebdtable2graph # rebdhuhn cattrs==23.2.3 - # via - # ebdtable2graph - # rebdhuhn + # via rebdhuhn certifi==2024.2.2 # via requests charset-normalizer==3.3.2 @@ -24,34 +21,25 @@ colorama==0.4.6 # via click ebdamame==0.1.3 # via your-favourite-package-name (pyproject.toml) -ebdtable2graph==0.1.20 - # via ebdamame idna==3.7 # via requests lxml==5.2.1 # via - # ebdtable2graph # python-docx # rebdhuhn # svgutils more-itertools==10.2.0 # via ebdamame networkx==3.3 - # via - # ebdtable2graph - # rebdhuhn + # via rebdhuhn python-docx==1.1.2 # via ebdamame rebdhuhn==0.2.3 # via your-favourite-package-name (pyproject.toml) requests==2.31.0 - # via - # ebdtable2graph - # rebdhuhn + # via rebdhuhn svgutils==0.3.4 - # via - # ebdtable2graph - # rebdhuhn + # via rebdhuhn typing-extensions==4.11.0 # via python-docx urllib3==2.2.1 diff --git a/src/ebd_toolchain/main.py b/src/ebd_toolchain/main.py new file mode 100644 index 0000000..bc5d675 --- /dev/null +++ b/src/ebd_toolchain/main.py @@ -0,0 +1,167 @@ +""" +A small click-based script to extract all EBDs from a given .docx file (available at edi-energy.de). +""" + +# invoke like this: +# main.py -i unittests/test_data/ebd20230619_v33.docx +# -o ../machine-readable_entscheidungsbaumdiagramme/FV2304 +# -t json -t dot -t svg -t puml +# +# or +# +# main.py -i unittests/test_data/ebd20230629_v34.docx +# -o ../machine-readable_entscheidungsbaumdiagramme/FV2310 +# -t json -t dot -t svg -t puml + +import json +from pathlib import Path +from typing import Literal + +import cattrs +import click +from ebdamame import TableNotFoundError, get_all_ebd_keys, get_ebd_docx_tables +from ebdamame.docxtableconverter import DocxTableConverter +from rebdhuhn.graph_conversion import convert_table_to_graph +from rebdhuhn.graphviz import convert_dot_to_svg_kroki, convert_graph_to_dot +from rebdhuhn.models.ebd_graph import EbdGraph +from rebdhuhn.models.ebd_table import EbdTable +from rebdhuhn.models.errors import ( + EbdCrossReferenceNotSupportedError, + EndeInWrongColumnError, + GraphTooComplexForPlantumlError, + NotExactlyTwoOutgoingEdgesError, + OutcomeCodeAmbiguousError, + PathsNotGreaterThanOneError, +) +from rebdhuhn.plantuml import convert_graph_to_plantuml + + +def _dump_puml(puml_path: Path, ebd_graph: EbdGraph) -> None: + plantuml_code = convert_graph_to_plantuml(ebd_graph) + with open(puml_path, "w+", encoding="utf-8") as uml_file: + uml_file.write(plantuml_code) + + +def _dump_dot(dot_path: Path, ebd_graph: EbdGraph) -> None: + dot_code = convert_graph_to_dot(ebd_graph) + with open(dot_path, "w+", encoding="utf-8") as uml_file: + uml_file.write(dot_code) + + +def _dump_svg(svg_path: Path, ebd_graph: EbdGraph) -> None: + dot_code = convert_graph_to_dot(ebd_graph) + svg_code = convert_dot_to_svg_kroki(dot_code) + with open(svg_path, "w+", encoding="utf-8") as svg_file: + svg_file.write(svg_code) + + +def _dump_json(json_path: Path, ebd_table: EbdTable) -> None: + with open(json_path, "w+", encoding="utf-8") as json_file: + json.dump(cattrs.unstructure(ebd_table), json_file, ensure_ascii=False, indent=2, sort_keys=True) + + +@click.command() +@click.option( + "-i", + "--input_path", + type=click.Path(exists=True, dir_okay=False, file_okay=True, path_type=Path), + prompt="Input DOCX File", + help="Path of a .docx file from which the EBDs shall be extracted", +) +@click.option( + "-o", + "--output_path", + type=click.Path(exists=False, dir_okay=True, file_okay=False, path_type=Path), + default="output", + prompt="Output directory", + help="Define the path where you want to save the generated files", +) +@click.option( + "-t", + "--export_types", + type=click.Choice(["puml", "dot", "json", "svg"], case_sensitive=False), + multiple=True, + help="Choose which file you'd like to create", +) +# pylint:disable=too-many-locals, too-many-branches, too-many-statements, +def main(input_path: Path, output_path: Path, export_types: list[Literal["puml", "dot", "json", "svg"]]): + """ + A program to get a machine-readable version of the AHBs docx files published by edi@energy. + """ + if output_path.exists(): + click.secho(f"The output directory '{output_path}' exists already.", fg="yellow") + else: + output_path.mkdir(parents=True) + click.secho(f"Created a new directory at {output_path}", fg="green") + all_ebd_keys = get_all_ebd_keys(input_path) + error_sources: dict[type, list[str]] = {} + + def handle_known_error(error: Exception, ebd_key: str) -> None: + click.secho(f"Error while processing EBD {ebd_key}: {error}", fg="yellow") + if type(error) not in error_sources: + error_sources[type(error)] = [] + error_sources[type(error)].append(ebd_key) + + for ebd_key, (ebd_title, ebd_kapitel) in all_ebd_keys.items(): + click.secho(f"Processing EBD {ebd_kapitel} '{ebd_key}' ({ebd_title})") + try: + docx_tables = get_ebd_docx_tables(docx_file_path=input_path, ebd_key=ebd_key) + except TableNotFoundError as table_not_found_error: + click.secho(f"Table not found: {ebd_key}: {str(table_not_found_error)}; Skip!", fg="yellow") + continue + assert ebd_kapitel is not None + try: + converter = DocxTableConverter( + docx_tables, + ebd_key=ebd_key, + chapter=ebd_kapitel.chapter_title, # type:ignore[arg-type] + # pylint:disable=line-too-long + sub_chapter=f"{ebd_kapitel.chapter}.{ebd_kapitel.section}.{ebd_kapitel.subsection}: {ebd_kapitel.section_title}", + ) + ebd_table = converter.convert_docx_tables_to_ebd_table() + except Exception as scraping_error: # pylint:disable=broad-except + click.secho(f"Error while scraping {ebd_key}: {str(scraping_error)}; Skip!", fg="red") + continue + if "json" in export_types: + _dump_json(output_path / Path(f"{ebd_key}.json"), ebd_table) + click.secho(f"💾 Successfully exported '{ebd_key}.json'") + try: + ebd_graph = convert_table_to_graph(ebd_table) + except (EbdCrossReferenceNotSupportedError, EndeInWrongColumnError, OutcomeCodeAmbiguousError) as known_issue: + handle_known_error(known_issue, ebd_key) + continue + except Exception as unknown_error: # pylint:disable=broad-except + click.secho(f"Error while graphing {ebd_key}: {str(unknown_error)}; Skip!", fg="red") + continue + if "puml" in export_types: + try: + _dump_puml(output_path / Path(f"{ebd_key}.puml"), ebd_graph) + click.secho(f"💾 Successfully exported '{ebd_key}.puml'") + except AssertionError as assertion_error: + # https://github.com/Hochfrequenz/rebdhuhn/issues/35 + click.secho(str(assertion_error), fg="red") + except (NotExactlyTwoOutgoingEdgesError, GraphTooComplexForPlantumlError) as known_issue: + handle_known_error(known_issue, ebd_key) + except Exception as general_error: # pylint:disable=broad-exception-caught + click.secho(f"Error while exporting {ebd_key} as UML: {str(general_error)}; Skip!", fg="yellow") + + try: + if "dot" in export_types: + _dump_dot(output_path / Path(f"{ebd_key}.dot"), ebd_graph) + click.secho(f"💾 Successfully exported '{ebd_key}.dot'") + if "svg" in export_types: + _dump_svg(output_path / Path(f"{ebd_key}.svg"), ebd_graph) + click.secho(f"💾 Successfully exported '{ebd_key}.svg'") + except PathsNotGreaterThanOneError as known_issue: + handle_known_error(known_issue, ebd_key) + except AssertionError as assertion_error: + # e.g. AssertionError: If indegree > 1, the number of paths should always be greater than 1 too. + click.secho(str(assertion_error), fg="red") + # both the SVG and dot path require graphviz to work, hence the common error handling block + click.secho(json.dumps({str(k): v for k, v in error_sources.items()}, indent=4)) + click.secho("🏁Finished") + + +if __name__ == "__main__": + # the parameter arguments gets provided over the CLI + main() # pylint:disable=no-value-for-parameter diff --git a/src/ebd_toolchain/mymodule.py b/src/ebd_toolchain/mymodule.py index 2afc797..bfdf96f 100644 --- a/src/ebd_toolchain/mymodule.py +++ b/src/ebd_toolchain/mymodule.py @@ -1,23 +1,23 @@ -""" -This a docstring for the module. -""" - - -class MyClass: # pylint: disable=too-few-public-methods - """ - This is a docstring for the class. - """ - - def __init__(self): - """ - Initialize for the sake of initializing - """ - self.my_instance_var: str = "abc" - - def do_something(self) -> str: - """ - Actually does nothing. - :return: the value of an instance variable - """ - # this is a super long line with: 100 < line length <= 120 to demonstrate the purpose of pyproject.toml - return self.my_instance_var +""" +This a docstring for the module. +""" + + +class MyClass: # pylint: disable=too-few-public-methods + """ + This is a docstring for the class. + """ + + def __init__(self): + """ + Initialize for the sake of initializing + """ + self.my_instance_var: str = "abc" + + def do_something(self) -> str: + """ + Actually does nothing. + :return: the value of an instance variable + """ + # this is a super long line with: 100 < line length <= 120 to demonstrate the purpose of pyproject.toml + return self.my_instance_var diff --git a/unittests/test_myclass.py b/unittests/test_myclass.py index 465bda0..3f7c2e8 100644 --- a/unittests/test_myclass.py +++ b/unittests/test_myclass.py @@ -1,11 +1,11 @@ -from ebd_toolchain.mymodule import MyClass - - -class TestMyClass: - """ - A class with pytest unit tests. - """ - - def test_something(self): - my_class = MyClass() - assert my_class.do_something() == "abc" +from ebd_toolchain.mymodule import MyClass + + +class TestMyClass: + """ + A class with pytest unit tests. + """ + + def test_something(self): + my_class = MyClass() + assert my_class.do_something() == "abc"