From 142b0a0dcc036b3297a180dba5ba096d9ceaafc0 Mon Sep 17 00:00:00 2001 From: monoxgas Date: Mon, 18 Mar 2024 18:20:46 -0600 Subject: [PATCH] v0.1.0 - Workflows + Pypi - JsonStorage - Improved logging --- .vscode/settings.json | 3 -- marque/.github/workflows/ci.yml | 50 ++++++++++++++++++++++++++ marque/.github/workflows/publish.yml | 45 +++++++++++++++++++++++ marque/__init__.py | 7 +++- marque/flow.py | 44 ++++++++++++++++------- marque/logging.py | 47 ++++++++++++++++++++++++ marque/scope.py | 33 ++++++++++++++++- marque/storage.py | 28 +++++++++++++++ marque/util.py | 54 ++++++++++------------------ 9 files changed, 259 insertions(+), 52 deletions(-) create mode 100644 marque/.github/workflows/ci.yml create mode 100644 marque/.github/workflows/publish.yml create mode 100644 marque/logging.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 81cee92..2bc86a6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,7 +7,4 @@ }, "editor.defaultFormatter": "charliermarsh.ruff" }, - "python.testing.pytestArgs": ["tests"], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true } diff --git a/marque/.github/workflows/ci.yml b/marque/.github/workflows/ci.yml new file mode 100644 index 0000000..44c45d2 --- /dev/null +++ b/marque/.github/workflows/ci.yml @@ -0,0 +1,50 @@ +name: Lint, Typecheck, and Test + +on: + push: + branches: [ main, dev ] + pull_request: + branches: [ main, dev ] + +jobs: + ci: + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11"] + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Setup Poetry + uses: abatilo/actions-poetry@v2 + + - name: Configure local .venv + run: | + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + + - uses: actions/cache@v3 + name: Cache Dependencies + with: + path: ./.venv + key: venv-${{ hashFiles('poetry.lock') }} + + - name: Install dependencies + run: poetry install + + - name: Linting + run: poetry run ruff -v marque/ + + - name: Typecheck + run: poetry run mypy -v marque/ + + - name: Tests + run: poetry run pytest -v \ No newline at end of file diff --git a/marque/.github/workflows/publish.yml b/marque/.github/workflows/publish.yml new file mode 100644 index 0000000..71a6ec2 --- /dev/null +++ b/marque/.github/workflows/publish.yml @@ -0,0 +1,45 @@ +name: Build and Publish + +on: + push: + branches: [ main ] + +jobs: + build-and-publish: + environment: protected + permissions: + contents: read + id-token: write + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.9' + + - name: Setup Poetry + uses: abatilo/actions-poetry@v2 + + - name: Configure local .venv + run: | + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + + - uses: actions/cache@v3 + name: Cache Dependencies + with: + path: ./.venv + key: venv-${{ hashFiles('poetry.lock') }} + + - name: Install dependencies + run: poetry install + + - name: Build package + run: poetry build + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/marque/__init__.py b/marque/__init__.py index 5e52ed1..772f42f 100644 --- a/marque/__init__.py +++ b/marque/__init__.py @@ -1,5 +1,10 @@ from marque.flow import Flow from marque.helpers import repeat from marque.scope import Scope +from marque.storage import JsonStorage, MemoryStorage, PolarsStorage -__all__ = ["Flow", "Scope", "repeat"] +__all__ = ["Flow", "Scope", "repeat", "MemoryStorage", "PolarsStorage", "JsonStorage"] + +from loguru import logger + +logger.disable("marque") diff --git a/marque/flow.py b/marque/flow.py index 039eec8..37f93a8 100644 --- a/marque/flow.py +++ b/marque/flow.py @@ -1,15 +1,17 @@ import inspect import traceback import typing as t +from contextlib import ExitStack from dataclasses import dataclass from datetime import datetime import coolname # type: ignore [import-untyped] from loguru import logger +from marque.logging import LogLevelLiteral, configure_logging from marque.scope import Scope, Tag from marque.storage import MemoryStorage, Storage -from marque.util import LogLevelLiteral, configure_logging, format_timedelta +from marque.util import PrintHook, format_timedelta T = t.TypeVar("T") @@ -98,7 +100,7 @@ class Step: func: StepFunc context: Context scope: Scope - logs: list[str] + logs: list[tuple[LogLevelLiteral, str]] ref_scope: Scope | None = None @property @@ -116,7 +118,7 @@ def __repr__(self) -> str: class Flow: - def __init__(self, name: str, storage: Storage | None = None, log_level: LogLevelLiteral = "INFO"): + def __init__(self, name: str, storage: Storage | None = None, log_level: LogLevelLiteral = "info"): self.name = name self.run = coolname.generate_slug(2) self.state: t.Literal["pending", "running", "finished"] = "pending" @@ -149,6 +151,10 @@ def scopes(self) -> list[Scope]: @property def logs(self) -> list[str]: + return [log[1] for log in self.raw_logs] + + @property + def raw_logs(self) -> list[tuple[LogLevelLiteral, str]]: return [log for group in self.steps for step in group for log in step.logs] def put(self, **values: t.Any) -> "Flow": @@ -190,6 +196,21 @@ def recall(self, name: str) -> dict[str, t.Any]: scope = self.current.ref_scope or self.current.scope return scope.recall(name) + def log(self, msg: str, level: LogLevelLiteral = "info") -> "Flow": + if self.current is None: + raise RuntimeError("Cannot log() outside of a running step.") + self.current.logs.append((level, msg)) + return self + + def success(self, message: str) -> None: + self.log(message, level="success") + + def warning(self, message: str) -> None: + self.log(message, level="warning") + + def error(self, message: str) -> None: + self.log(message, level="error") + def tag(self, content: str, value: float | None = None) -> "Flow": if self.current is None: self.tags.append(Tag(content, value)) @@ -197,12 +218,6 @@ def tag(self, content: str, value: float | None = None) -> "Flow": self.current.scope.tag(content, value) return self - def log(self, msg: str) -> "Flow": - if self.current is None: - raise RuntimeError("Cannot log() outside of a running step.") - self.current.logs.append(msg) - return self - def push_with_scope(self, step: StepFunc | list[StepFunc], scope: Scope | None = None, **context: t.Any) -> "Flow": step_objects: list[Step] = [] for step_func in step if isinstance(step, list) else [step]: @@ -251,6 +266,9 @@ def __call__(self) -> None: self.state = "running" + def _log(message: str) -> None: + self.log(message, level="info") + while self.group_idx < len(self.steps): self.step_idx = 0 while self.step_idx < len(self.steps[self.group_idx]): @@ -260,7 +278,9 @@ def __call__(self) -> None: start = datetime.now() try: - self.current.func(self) + with ExitStack() as stack: + stack.enter_context(PrintHook(_log)) + self.current.func(self) except Exception: if self.ignore_errors: self.current.scope.error = traceback.format_exc() @@ -268,8 +288,8 @@ def __call__(self) -> None: else: raise - for log in self.current.logs: - logger.info(f" |: {log}") + for lvl, log in self.current.logs: + logger.log(lvl.upper(), f" |: {log}") self.current.scope.duration = datetime.now() - start logger.info(f" |- in {format_timedelta(self.current.scope.duration)}") diff --git a/marque/logging.py b/marque/logging.py new file mode 100644 index 0000000..e618892 --- /dev/null +++ b/marque/logging.py @@ -0,0 +1,47 @@ +import pathlib +import sys +import typing as t + +from loguru import logger + +g_configured: bool = False + +LogLevelList = ["trace", "debug", "info", "success", "warning", "error", "critical"] +LogLevelLiteral = t.Literal["trace", "debug", "info", "success", "warning", "error", "critical"] + + +def configure_logging( + log_level: str, + log_file: pathlib.Path | None = None, + log_file_level: LogLevelLiteral = "debug", +) -> None: + global g_configured + + if g_configured: + return + + logger.enable("marque") + + logger.level("TRACE", color="", icon="[T]") + logger.level("DEBUG", color="", icon="[_]") + logger.level("INFO", color="", icon="[=]") + logger.level("SUCCESS", color="", icon="[+]") + logger.level("WARNING", color="", icon="[-]") + logger.level("ERROR", color="", icon="[!]") + logger.level("CRITICAL", color="", icon="[x]") + + # Default format: + # "{time:YYYY-MM-DD HH:mm:ss.SSS} | " + # "{level: <8} | " + # "{name}:{function}:{line} - {message}", + + custom_format = "{time:HH:mm:ss.SSS} | {level.icon} {message}" + + logger.remove() + logger.add(sys.stderr, format=custom_format, level=log_level.upper()) + + if log_file is not None: + logger.add(log_file, format=custom_format, level=log_file_level.upper()) + logger.info(f"Logging to {log_file}") + + g_configured = True diff --git a/marque/scope.py b/marque/scope.py index 1736f05..25df1b9 100644 --- a/marque/scope.py +++ b/marque/scope.py @@ -1,6 +1,9 @@ +import datetime import typing as t from dataclasses import dataclass +from pydantic import TypeAdapter + if t.TYPE_CHECKING: from datetime import timedelta @@ -32,7 +35,35 @@ def __init__(self, id_: str): self.duration: timedelta | None = None def __repr__(self) -> str: - return f"" + return f"" + + # TODO: This could probably get pulled out to the utils + # module and rework these as BaseModels or dataclasses + + def to_json(self) -> dict[str, t.Any]: + artifact_adapter = TypeAdapter(Artifact) + tag_adapter = TypeAdapter(Tag) + + return { + "id": self.id, + "tags": [tag_adapter.dump_python(tag) for tag in self.tags], + "artifacts": [artifact_adapter.dump_python(artifact) for artifact in self.artifacts], + "error": self.error, + "duration": self.duration.microseconds if self.duration else None, + } + + @classmethod + def from_json(cls, data: dict[str, t.Any]) -> "Scope": + artifact_adapter = TypeAdapter(Artifact) + tag_adapter = TypeAdapter(Tag) + + scope = cls(data["id"]) + scope.tags = [tag_adapter.validate_python(tag) for tag in data["tags"]] + scope.artifacts = [artifact_adapter.validate_python(artifact) for artifact in data["artifacts"]] + scope.error = data["error"] + scope.duration = datetime.timedelta(microseconds=data["duration"]) if data["duration"] else None + + return scope def keep(self, name: str, value: dict[str, t.Any], overwrite: bool = False) -> "Scope": if not isinstance(value, dict): diff --git a/marque/storage.py b/marque/storage.py index efef2f6..19c93f1 100644 --- a/marque/storage.py +++ b/marque/storage.py @@ -3,6 +3,7 @@ import pathlib import typing as t +import orjson import polars as pl from marque.scope import Scope @@ -170,3 +171,30 @@ def load(self, id_: str) -> Scope: def get_ids(self) -> list[str]: return list(set(self.df["id"].to_list())) + + +class JsonStorage(Storage): + def __init__(self, path: pathlib.Path | str): + self.path = pathlib.Path(path) + self.path.mkdir(parents=True, exist_ok=True) + + def save(self, scope: Scope) -> None: + file_path = self.path / f"{scope.id}.json" + with file_path.open("wb") as f: + f.write(orjson.dumps(scope.to_json())) + + def load(self, id_: str) -> Scope: + file_path = self.path / f"{id_}.json" + if not file_path.exists(): + raise KeyError(id_) + + with file_path.open("rb") as f: + data = t.cast(dict[str, t.Any], orjson.loads(f.read())) + + return Scope.from_json(data) + + def get_ids(self) -> list[str]: + return [file.stem for file in self.path.glob("*.json")] + + def flush(self) -> None: + pass diff --git a/marque/util.py b/marque/util.py index 568af6a..f4afe8f 100644 --- a/marque/util.py +++ b/marque/util.py @@ -1,13 +1,13 @@ -import pathlib -import sys +import builtins import typing as t +from contextlib import contextmanager from datetime import timedelta import orjson from pydantic import BaseModel try: - from pydantic import BaseModel # type: ignore + from pydantic import BaseModel except ImportError: # Define a dummy BaseModel if pydantic isn't installed class BaseModel: # type: ignore @@ -27,38 +27,6 @@ def json_deserialize(obj: str) -> t.Any: return orjson.loads(obj) -LogLevelList = ["TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR", "CRITICAL"] -LogLevelLiteral = t.Literal["TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR", "CRITICAL"] - - -def configure_logging( - log_level: str, log_file: pathlib.Path | None = None, log_file_level: LogLevelLiteral = "DEBUG" -) -> None: - from loguru import logger - - logger.level("TRACE", color="", icon="[T]") - logger.level("DEBUG", color="", icon="[_]") - logger.level("INFO", color="", icon="[=]") - logger.level("SUCCESS", color="", icon="[+]") - logger.level("WARNING", color="", icon="[-]") - logger.level("ERROR", color="", icon="[!]") - logger.level("CRITICAL", color="", icon="[x]") - - # Default format: - # "{time:YYYY-MM-DD HH:mm:ss.SSS} | " - # "{level: <8} | " - # "{name}:{function}:{line} - {message}", - - custom_format = "{level.icon} {message}" - - logger.remove() - logger.add(sys.stderr, format=custom_format, level=log_level) - - if log_file is not None: - logger.add(log_file, format=custom_format, level=log_file_level) - logger.info(f"Logging to {log_file}") - - # https://stackoverflow.com/questions/538666/format-timedelta-to-string def format_timedelta(td_object: timedelta) -> str: # Convert the entire timedelta to milliseconds @@ -83,3 +51,19 @@ def format_timedelta(td_object: timedelta) -> str: strings.append(f"{period_value}{period_name}") return " ".join(strings) if strings else "~0ms" + + +@contextmanager +def PrintHook(log_func: t.Callable[[str], None]) -> t.Iterator[None]: + original_print = builtins.print + + def custom_print(*args: t.Any, **kwargs: t.Any) -> None: + log_message = " ".join(str(arg) for arg in args) + log_func(log_message) + + builtins.print = custom_print + + try: + yield + finally: + builtins.print = original_print