From 9834b52bd243ddf1f31b9d34f59647341f9a2eb5 Mon Sep 17 00:00:00 2001 From: Wenceslao Villegas Date: Tue, 31 Aug 2021 23:32:43 +0000 Subject: [PATCH] RFC: Fix mermaid graph --- .bumpversion.cfg | 2 +- .pre-commit-config.yaml | 9 + CHANGELOG.md | 24 +- CONTRIBUTING.md | 257 +++++++++++++++++++++- README.md | 79 ++++--- documentation/source/classes.md | 3 + documentation/source/end2end.md | 21 +- documentation/source/project_structure.md | 2 +- setup.py | 2 - soam/__init__.py | 2 +- soam/models/base.py | 6 +- 11 files changed, 340 insertions(+), 67 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index a15dc63..4775e6c 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.9.3 +current_version = 0.9.4 tag = False [bumpversion:file:soam/__init__.py] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9bde437..66c2147 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,3 +51,12 @@ repos: exclude: templates/.+|notebook/.+ files: \.py$ types: [python] +- repo: https://github.com/MuttData/gfm-diagram + rev: v0.3.0 + hooks: + - id: gfmd + name: gfmd + entry: gfmd + types: [file] + files: 'README.md' + require_serial: true diff --git a/CHANGELOG.md b/CHANGELOG.md index f155a9d..e74c817 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.9.4- 2021-08-17] + +### Added +- pre-commit hook for rendering Mermaid diagrams and making README GitHub.md compliant. + - github.com/MuttData/gfm-diagram + +### Fixed +- Mermaid rendering on README.md + +### Removed +- Removed unused clickup dependency + unused soam cli entrypoint. + ## [0.9.3 - 2021-08-13] ### Update @@ -115,9 +127,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Mlflow tracking: - - Config settings on cfg.py file. - - Use guide and example notebook. - - Mlflow Tests v0. + - Config settings on cfg.py file. + - Use guide and example notebook. + - Mlflow Tests v0. ## [0.4.1 - 2021-06-14] @@ -141,9 +153,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Time Series Extractor: - - Table mappings. - - Replace for % sign. - - Add columns_mapping docstring + - Table mappings. + - Replace for % sign. + - Add columns_mapping docstring ## [0.3.6 - 2021-05-10] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5286623..2fd7b36 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -48,6 +48,243 @@ Before submitting an issue, first check on the [issues tracker](https://gitlab.c ### Security issues If you find a security related bug or any kind of security rellated issue, **please DO NOT file a public issue**. Sensitive security-related issues should be reported to privately to the repo owner along with a PoC if possible. You can [send us an email](mailto:security@muttdata.ai) and we'll go from there. +## Development Setup +### Installation +To set up your environment and start developing check this [guide](https://gitlab.com/mutt_data/soam/-/blob/master/documentation/source/developers_starting_point.md). +### Pre-Commit for Version Control Integration + +We use [pre-commit](https://pre-commit.com) to run several code scans and hooks like linters and formatters, defined in `.pre-commit-config.yaml`, on each staged file that make the development cycle easier. + +To install pre-commit hooks run +```bash +pre-commit install +pre-commit install -t push +``` + +## Style guide +`soam` follows [PEP8](https://www.python.org/dev/peps/pep-0008/). + +If you installed the [pre-commit hooks](#pre-commit) you shouldn't worry too much about style, since they will fix it for you or warn you about styling errors. We use the following hooks: + +- [black](https://github.com/psf/black): an opinionated code formatting tool that ensures consistency across all projects using it. +- [flake8](https://github.com/PyCQA/flake8): a tool to enforce style guide. +- [mypy](https://github.com/python/mypy): a static type checker for Python. +- [pylint](https://github.com/PyCQA/pylint): a source code, bug and quality checker. +- [isort](https://github.com/timothycrosley/isort): a utility to sort imports alphabetically, and automatically separated into sections and by type. + +## Docstrings +We use either [numpy style](https://numpydoc.readthedocs.io/en/latest/format.html) or [google style](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings) docstring formatting. It's usually good to include the following docstrings: +- Module level docstring giving a general overview of what it does. + - It may include TODOs + - It may include examples +- Class dosctrings explaining what it is +- Method/functions to explain what it does and what it's parameters are + +## Testing +`soam` uses the [pytest framework](https://docs.pytest.org/en/latest/) to test `soam`. + +To run the default test suite run this: +```bash +pytest +``` + +Note that some tests may depend on external dependencies not installed with `[dev]` if you want to run the full set of tests use `[all]` instead, running this: +```bash +pip install -e .[all] +``` + +Run coverage: +```bash +pytest --cov-report html:cov_html --tb=short -q --cov-report term-missing --cov=. tests/ +``` +That should output a short summary and generate a dir `cov_html/` with a detailed HTML report that can be viewed by opening `index.html` in your browser. + +To run the tests with [nox](https://nox.thea.codes/en/stable/): +```bash +nox --session tests +``` + +### Regression testing +[Regression testing](https://en.wikipedia.org/wiki/Regression_testing) to ensure new changes have not broken previously working features. + +## Documentation +`soam` uses [Sphinx](https://www.sphinx-doc.org/en/master/) to autogenerate it's [docs](https://mutt_data.gitlab.io/soam/) that are automatically built from [docstrings](#docstrings) and pushed by the [CI jobs](#cicd-jobs). Check the [style guide](#style-guide) section for notes on docstrings. Pushing all the docs is too cumbersome. You can generate them locally by doing: + +```bash +pip install .[all] +cd docs +make html +``` + +And open `docs/build/html/index.html` on your browser of choice. + +Alternatively you can see the docs for the `master` branch [here.](https://mutt_data.gitlab.io/soam/index.html) + +## Versioning +`soam` uses [SemVer](https://semver.org). To keep things easy, we've included [bump2version](https://github.com/c4urself/bump2version/) as a dev dependency. You can use `bump2version minor` to increase the minor number. + +Please remember to bump the version when submitting your PR! + +## Deprecation + +Before fully deprecating a feature or making a breaking change, give users a `DeprecationWarning` and enough time for them to migrate their code. + +### Decorator + +`soam` uses [deprecated](https://github.com/tantale/deprecated) decorators to implement `DeprecationWarning`. + +Add a `DeprecationWarning` considering indicate: +- How to achieve similar behavior if an alternative is available or a reason for the deprecation if no clear alternative is available. +- The versions number when the functionality was deprecated and when the EOL will be. + +To do this, decorate your deprecated function with **@deprecated** decorator: + +```python +from deprecated import deprecated + + +@deprecated +def some_old_function(x, y): + return x + y +``` + +You can also decorate a class or a method: + +```python +from deprecated import deprecated + + +class SomeClass(object): + @deprecated + def some_old_method(self, x, y): + return x + y + + +@deprecated +class SomeOldClass(object): + pass +``` + +You can give a "reason" message to help the developer to choose another function/class: + +```python +from deprecated import deprecated + + +@deprecated(reason="use another function") +def some_old_function(x, y): + return x + y +``` + +### Release +Deprecation warning must be added in minor releases and EOL will be on the next major releases. + +## PRs +Also called MRs (Merge Requests) in gitlab. + +`soam` development follows a simple workflow: +- Assign yourself an issue + - If there's none, [create it](#issues) + - If you can't assign it yourself, ask someone to do it for you +- Create a new branch with a descriptive name +- Push to the remote + - Open a [WIP](#WIP) PR to allow discussion and let others know where you're at with the issue +- Work on it πŸ€“ +- When ready change the PR to [RFC](#RFC) + - Make sure you run the pipelines once the PR leaves *Draft mode*, i.e on the [Merge Result.](https://docs.gitlab.com/ee/ci/merge_request_pipelines/pipelines_for_merged_results/). +- You'll need at least one approval to merge + - Merge will be disabled if the [CI/CD pipelines are failing](#cicd-jobs) + - If you can't merge it yourself, ask your last approver to merge it + - Please squash the commits and delete the branch +- Congrats and thanks for your contribution πŸŽ‰ + +Please keep PRs minimal. Try to keep the modified files to the bare needed for the issue you are working on. This will make the PR's changes more readable and allow for a quicker interaction with reviewers. + +### WIP +WIP stands for **W**ork **i**n **P**rogress. WIP PRs are not yet ready to be merged. They allow for: +- Other project members to know you are working on something +- Early feedback, e.g. if you are doing something wrong or they see a problem down the road with your approach + +You can tag a PR as WIP using the `WIP:` prefix on you PR title. + +### RFC +RFC stands for **R**equest **f**or **C**omments. It means you consider the issue is solved by the code in the PR and are asking people to review your changes. + +### CI/CD jobs + +All commits pushed to branches in pull requests will trigger CI jobs that install `soam` in a gitlab-provided docker-env and all the extras, run all tests and check for linting. Look at [.gitlab-ci.yml](.gitlab-ci.yml) for more details on this and as well as the official [docs](https://docs.gitlab.com/ce/ci/README.html). Note that only PRs that pass the CI will be allowed to merge. + +`NOTE:` If your commit message contains [ci skip] or [skip ci], without capitalization, the job will be skipped i.e. no CI job will be spawned for that push. + +Alternatively, one can pass the ci.skip Git push option if using Git 2.10 or newer: `git push -o ci.skip` more info in [here](https://docs.gitlab.com/ce/ci/yaml/README.html#skipping-builds). + +`IMPORTANT:`. If you skip the CI job it will not disable the option to do the merge, be careful when doing this. + +**Important note on coverage:** A regex that captures the output from `pytest-cov` has been set from Settings -> CI/CD -> General Pipelines -> Test coverage parsing + +## Rules of Thumb +- Important changes should be mentioned in the [README.md](README.md) +- Documentation must be updated. +- Every change should be present in the [CHANGELOG.md](CHANGELOG.md) +![Mermaid diagram](https://kroki.io/mermaid/svg/eNoDAAAAAAE=) + +
+Diagram source code + +```mermaid + +``` +
+ + soam +Thanks for your interest in contributing to `soam` πŸŽ‰. These are the guidelines for contributions. Reading them will help you get started on how to make useful contributions. + +## Foreword +This guide is not final. It will evolve over time, as we learn and add new voices to the project. Check it from time to time and feel free to make suggestions πŸ˜ƒ + +## Table of Contents +- [Code of Conduct](#code-of-conduct) +- [Issues](#issues) + - [Labels](#labels) + - [Security issues](#security-issues) +- [Development Setup](#development-setup) + - [Installation](#installation) + - [Pre-Commit for Version Control Integration](#pre-commit-for-version-control-integration) +- [Style guide](#style-guide) +- [Docstrings](#docstrings) +- [Testing](#testing) + - [Regression testing](#regression-testing) +- [Documentation](#documentation) +- [Versioning](#versioning) +- [Deprecation](#deprecation) + - [Decorator](#decorator) + - [Release](#release) +- [PRs](#prs) + - [WIP](#wip) + - [RFC](#rfc) + - [CI/CD jobs](#cicd-jobs) +- [Rules of Thumb](#rules-of-thumb) + +## Code of Conduct +One of our core values at Mutt is that **we are an open team**. We all make mistakes and need help fixing them. We foster psychological safety. We clearly express it when we don’t know something and ask for advice. + +We expect everyone contributing to `soam` to follow this principle. Be kind, don't be rude, keep it friendly; learn, teach, ask and help. + +## Issues + +Before submitting an issue, first check on the [issues tracker](https://gitlab.com/mutt_data/soam/issues) if there is already one trying to cover that topic, to avoid duplicates. Otherwise we invite you to create it. And if you feel that your issue can be categorized you can use this labels: + +### Labels + +| name | description | shortcuts | +| ---------- | ----------- | ------ | +| `bug` | Report a bug | [Look](https://gitlab.com/mutt_data/soam/issues?scope=all&utf8=βœ“&state=opened&label_name[]=bug) for `bug` or [create](https://gitlab.com/mutt_data/soam/-/issues/new?issuable_template=Bug) one +|`feature-request`|Request for a new feature|[Look](https://gitlab.com/mutt_data/soam/issues?scope=all&utf8=βœ“&state=opened&label_name[]=feature-request) for `feature-request` or [create](https://gitlab.com/mutt_data/soam/-/issues/new?issuable_template=Feature) one +|`enhancement`|Propose an enhancement|[Look](https://gitlab.com/mutt_data/soam/issues?scope=all&utf8=βœ“&state=opened&label_name[]=enhancement) for `enhancement` or [create](https://gitlab.com/mutt_data/soam/-/issues/new?issuable_template=Enhancement) one +|`discussion`|Start a new discussion|[Look](https://gitlab.com/mutt_data/soam/issues?scope=all&utf8=βœ“&state=opened&label_name[]=discussion) for `discussion` or [create](https://gitlab.com/mutt_data/soam/-/issues/new?issuable_template=Discussion) one + +### Security issues +If you find a security related bug or any kind of security rellated issue, **please DO NOT file a public issue**. Sensitive security-related issues should be reported to privately to the repo owner along with a PoC if possible. You can [send us an email](mailto:security@muttdata.ai) and we'll go from there. ## Development Setup ### Installation @@ -99,8 +336,8 @@ If you installed the [pre-commit hooks](#pre-commit) you shouldn't worry too muc ## Docstrings We use either [numpy style](https://numpydoc.readthedocs.io/en/latest/format.html) or [google style](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings) docstring formatting. It's usually good to include the following docstrings: - Module level docstring giving a general overview of what it does. - - It may include TODOs - - It may include examples + - It may include TODOs + - It may include examples - Class dosctrings explaining what it is - Method/functions to explain what it does and what it's parameters are @@ -172,8 +409,8 @@ Before fully deprecating a feature or making a breaking change, give users a `De SoaM uses [deprecated](https://github.com/tantale/deprecated) decorators to implement `DeprecationWarning`. Add a `DeprecationWarning` considering indicate: - - How to achieve similar behavior if an alternative is available or a reason for the deprecation if no clear alternative is available. - - The versions number when the functionality was deprecated and when the EOL will be. +- How to achieve similar behavior if an alternative is available or a reason for the deprecation if no clear alternative is available. +- The versions number when the functionality was deprecated and when the EOL will be. To do this, decorate your deprecated function with **@deprecated** decorator: @@ -222,18 +459,18 @@ Also called MRs (Merge Requests) in gitlab. SoaM development follows a simple workflow: - Assign yourself an issue - - If there's none, [create it](#issues) - - If you can't assign it yourself, ask someone to do it for you + - If there's none, [create it](#issues) + - If you can't assign it yourself, ask someone to do it for you - Create a new branch with a descriptive name - Push to the remote - - Open a [WIP](#WIP) PR to allow discussion and let others know where you're at with the issue + - Open a [WIP](#WIP) PR to allow discussion and let others know where you're at with the issue - Work on it πŸ€“ - When ready change the PR to [RFC](#RFC) - Make sure you run the pipelines once the PR leaves *Draft mode*, i.e on the [Merge Result.](https://docs.gitlab.com/ee/ci/merge_request_pipelines/pipelines_for_merged_results/). - You'll need at least one approval to merge - - Merge will be disabled if the [CI/CD pipelines are failing](#cicd-jobs) - - If you can't merge it yourself, ask your last approver to merge it - - Please squash the commits and delete the branch + - Merge will be disabled if the [CI/CD pipelines are failing](#cicd-jobs) + - If you can't merge it yourself, ask your last approver to merge it + - Please squash the commits and delete the branch - Congrats and thanks for your contribution πŸŽ‰ Please keep PRs minimal. Try to keep the modified files to the bare needed for the issue you are working on. This will make the PR's changes more readable and allow for a quicker interaction with reviewers. diff --git a/README.md b/README.md index d8801bf..dfa38f0 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,14 @@ projects. There come the name: Son of a Mutt = SoaM ## SoaM pipeline -```mermaid + +![Mermaid diagram](https://kroki.io/mermaid/svg/eNpljbEOgjAURXe_4o0wEBQQnJzUxEQSIm6EocITmwAlrx1MCP-uloIkdnu959xbEeuecLmu4PN4uc6sA1PsziTC2c4dZ89LL3NTwWK48QYhReIo4fhSxAolyM2NuVmas2pCT1_-VESslQ9BDf50XxOBIU6CsGBSLYBt30-_vK0gFiXWwzBaBgn0FWaW7kgIS14oLlppTyWhJqK_FUhqoZZrI7gz4BU7Qd_ZOY_G_A05RWCs) + +
+Diagram source code - graph LR +```mermaid +graph LR id0[(Database I)]-->id2[/SoaM Time Series Extractor/] id1[(Database II)]-->id2 id2-->id3[/SoaM Transformer/] @@ -21,14 +26,15 @@ projects. There come the name: Son of a Mutt = SoaM id6-->id8[/SoaM Reporting/] id7-->id8 ``` - +
+ This library pipeline supports any data source. The process is structured in different stages: * Extraction: manages the granularity and aggregation of the input data. * Preprocessing: lets select among out of the box tools to perform standard tasks as normalization or fill nan values. * Forecasting: fits a model and predict results. * Postprocessing: modifies the results based on business/real information or create analysis with the predicted values, - such as an anomaly detection. +such as an anomaly detection. ## Overview of the Steps Run in SoaM @@ -43,8 +49,6 @@ This step implements functions to further cleanup and prepare the data for the f * Apply value normalizations * Shift values -[//comment]: # (the preprocessing supports custom transformations?) - ### Forecasting This stage receives the clean data, performs the forecast and store the predicted values in the defined storages. Currently there are implementations to store in CSV files and SQL databases. @@ -63,26 +67,37 @@ To do backtesting the data is splited in train and validation, there are two spl For more information review this document: [backtesting at scale](https://eng.uber.com/backtesting-at-scale/) ### Postprocessing -[//comment]: # (TODO: explain postprocessing stage chaining) This last stage is prepared to work on the forecasts generated by the pipeline. For example: * Clip/Cleanup the predictions. * Perform further analyses (such as anomaly detection). * Export reports. -[//comment]: # (does the postprocessing support custom outputs?) - ## Table of Contents -- [Installation](#installation) -- [Quick start](#quick-start) -- [Usage](#usage) -- [Soam CLI](#soam-cli) -- [Database Management](#database-management) -- [Developers Guide](#developers-guide) -- [Testing](#testing) -- [Contributing](#contributing) -- [Rules of Thumb](#rules-of-thumb) -- [Credits](#contributing) -- [License](#license) +- [SoaM](#soam) + - [SoaM pipeline](#soam-pipeline) + - [Overview of the Steps Run in SoaM](#overview-of-the-steps-run-in-soam) + - [Extraction](#extraction) + - [Preprocessing](#preprocessing) + - [Forecasting](#forecasting) + - [Backtesting](#backtesting) + - [Window policies](#window-policies) + - [Postprocessing](#postprocessing) + - [Table of Contents](#table-of-contents) + - [Installation](#installation) + - [Install extras](#install-extras) + - [Quick start](#quick-start) + - [Usage](#usage) + - [Database management](#database-management) + - [Alembic](#alembic) + - [Developers guide](#developers-guide) + - [Testing](#testing) + - [Testing data extraction](#testing-data-extraction) + - [Testing plots](#testing-plots) + - [Contributing](#contributing) + - [CI](#ci) + - [Rules of Thumb](#rules-of-thumb) + - [Credits](#credits) + - [License](#license) ## Installation Install the base lib via [pipy](https://pypi.org/project/soam/) by executing: @@ -109,9 +124,9 @@ pip install -e ".[report]" # slack and *_report extras pip install -e ".[all]" # all previous ``` -_Note_: The `pdf_report` extra might need to run the following command before installation ([More info](https://nbconvert.readthedocs.io/en/latest/install.html#installing-tex)) +*Note*: The `pdf_report` extra might need to run the following command before installation ([More info](https://nbconvert.readthedocs.io/en/latest/install.html#installing-tex)) - $ `apt-get install texlive-xetex texlive-fonts-recommended libpoppler-cpp-dev` +$ `apt-get install texlive-xetex texlive-fonts-recommended libpoppler-cpp-dev` ## Quick start [Here](https://gitlab.com/mutt_data/soam/-/blob/master/notebook/examples/quickstart.ipynb) is an example for a quick start into SoaM. In it a time series with AAPL stock prices is loaded, processed and forecasted. As well, there's [other example](https://gitlab.com/mutt_data/soam/-/blob/master/notebook/examples/soamflowrun.ipynb) with the same steps, but exploding the power of flows. @@ -151,10 +166,6 @@ The second command will use this file to impact the changes in the database. For more alembic commands visit the [documentation](https://alembic.sqlalchemy.org/en/latest/) -[//comment]: # (TODO: add documentation about parameter and step logging.) -[//comment]: # (TODO: split and reorder documentation.) - - ## Developers guide If you are going to develop SoaM, you should checkout the documentation directory before adding code, you can start in the [project structure document](https://mutt_data.gitlab.io/soam/project_structure.html). @@ -191,7 +202,6 @@ TEST_DB_CONNSTR="postgresql://soam:soam@localhost/soam" pytest -v tests/test_fil Note that even though the example has a DB name during the tests a new database is created and dropped to ensure that no state is maintened between runs. - ### Testing plots To generate images for testing we use [pytest-mpl](https://github.com/matplotlib/pytest-mpl) as follows: @@ -214,17 +224,14 @@ In the project root directory, there is a noxfile.py file defining all the jobs, You can run all the jobs with the command `nox`, from the project root directory or run just one job with `nox --session test` command, for example. -[//comment]: # (TODO: Link or explain how to run test and check locally) -[//comment]: # (TODO: Review the following CI explanation) - The .gitlab-ci.yml file configures the gitlab CI to run nox. Nox let us execute some test and checks before making the commit. We are using: * Linting job: - * [isort](https://pycqa.github.io/isort/) to reorder imports - * [pylint](https://github.com/PyCQA/pylint) to be pep8 compliant - * [black](https://github.com/psf/black) to format for code conventions - * [mypy](http://mypy-lang.org/) for static type checking + * [isort](https://pycqa.github.io/isort/) to reorder imports + * [pylint](https://github.com/PyCQA/pylint) to be pep8 compliant + * [black](https://github.com/psf/black) to format for code conventions + * [mypy](http://mypy-lang.org/) for static type checking * [bandit](https://bandit.readthedocs.io/en/latest/) for security checks * [pytest](https://docs.pytest.org/) to run all the tests in the test folder. * [pyreverse](https://pythonhosted.org/theape/documentation/developer/explorations/explore_graphs/explore_pyreverse.html) to create diagrams of the project @@ -238,12 +245,11 @@ Gitlab cache policy: * `push`: push the created files to the cloud. * `pull-push`: pull the cached files and push the newly created files. - ## Rules of Thumb This section contains some recommendations when working with SoaM to avoid common mistakes: * When possible reuse objects to preserve their configuration. - Eg: Transformations, forecasters, etc. +Eg: Transformations, forecasters, etc. * Use the same train-test windows when backtesting and training to deploy and on later usage. ## Credits @@ -261,3 +267,4 @@ Wenceslao Villegas ## License `soam` is licensed under the [Apache License 2.0](https://gitlab.com/mutt_data/muttlib/-/blob/master/LICENCE). + diff --git a/documentation/source/classes.md b/documentation/source/classes.md index 7e21fff..c9822b4 100644 --- a/documentation/source/classes.md +++ b/documentation/source/classes.md @@ -17,3 +17,6 @@ Prefect Task and Flow states when they are updated. ##### Forecasting class diagram ![forecaster](../images/Forecaster_class_diagram.png) +https://gitlab.com/mutt_data/onboarding/-/blob/master/docs/modern_python_apps.md#documentation + +[//comment]: # (TODO: create some flow and class diagrams, some expected or possible architecture implementations.) diff --git a/documentation/source/end2end.md b/documentation/source/end2end.md index 958aa86..c2c0aff 100644 --- a/documentation/source/end2end.md +++ b/documentation/source/end2end.md @@ -4,6 +4,12 @@ The purpose of this document is to show how a generic end to end data product wo Let's imagine you need to run a process everyday that consists on loading the daily ABT, querying your database, transforming some datapoints, forecasting on a desired timeframe, plotting the results and sharing it by slack with your workteam. After that, promote results to the production environment. The diagram bellow show this process: + +![Mermaid diagram](https://kroki.io/mermaid/svg/eNqNUU1rwzAMvfdX6Lgduv6AQCGjbAw66JrdTDGq7bZmjp3ZCqHQHz856QfpLr2Yp-cn6UnaudCpA0aC5bqYAETspEZC8bTGDhaMnjfT6dwF1Mxbd5S4JdaldruP2BxgUb5zyETAWlqfCL0ynNHEUAcykoJkqMVqiGFtUusoAQVgSreKbPCboq8x7sIppsFowJsOsiewHtCjO5JVCRav2dioragCfsLHOToXvTr9J61v0l4J8NuaeLR-L_VWfGUM5V0_iujTLsS6V7GpJlhPSXxf-H5nA5n1TBmFiVgu3m44fzUuUM-vzqCfh29h2KVMDtWPqHJ43RnPX2V6sGu8nlxetHHHlxSzcgCzzct0fkrqYHTrTDqNN1s8kDHa1iMJdwcv_gA_Ldj4) + +
+Diagram source code + ```mermaid flowchart LR; raw_data[(Raw Data)]-->load_daily_abt @@ -18,7 +24,8 @@ flowchart LR; airflow[/Airflow/].->|schedules|soam_instance; airflow[/Airflow/].->|schedules|promote_to_prod; ``` - +
+ In this case, SoaM and Airflow will be interacting elbow to elbow to get this running. It's important to understand the distinctions between them. **SoaM will be you internal workflow manager, while Airflow will be your external manager**. Airflow will be in charge of scheduling all of your desired tasks through a DAG and retrying if an issue arises. Meanwhile SoaM, as your internal workflow manager, is the one in charge of managing our Python logic to carry out the desired steps mentioned before. @@ -39,13 +46,9 @@ The key here is that Airflow takes care of scheduling on a defined basis (hourly Once you have your data stored in your database, its time for SoaM to come into the scene. 1. Firstly, `TimeSeriesExtractor` will come into action by querying the data needed from your database and returning a ready to work Pandas `DataFrame`. - -2. Then, after you have your `DataFrame`, it's time for the `Transformer`. With the toolkit provided by this module you will be able to apply any SciKit-Learn transformation or even create a custom one for your specific use case. - -3. Thirdly, and once the data is fully cleaned, `Forecaster` offers you the ability to apply different Machine Learning algortihms on your data such as [FBProphet](https://facebook.github.io/prophet/) or [Orbit](https://github.com/uber/orbit) or again a custom one authered by the user to forecast your time-series on a desired time-frame. - -4. Last but not least, it's time to plot and see the results! Here is when the `ForecastPlotter` appears and generates a beautiful plot where you will see your past data and the forecast produced in the previous step. - -5. Finally, the `Reporting` module provides tools to generate and share reports with your team or friends via Google Sheets, Email, PDF and/or Slack. +1. Then, after you have your `DataFrame`, it's time for the `Transformer`. With the toolkit provided by this module you will be able to apply any SciKit-Learn transformation or even create a custom one for your specific use case. +1. Thirdly, and once the data is fully cleaned, `Forecaster` offers you the ability to apply different Machine Learning algortihms on your data such as [FBProphet](https://facebook.github.io/prophet/) or [Orbit](https://github.com/uber/orbit) or again a custom one authered by the user to forecast your time-series on a desired time-frame. +1. Last but not least, it's time to plot and see the results! Here is when the `ForecastPlotter` appears and generates a beautiful plot where you will see your past data and the forecast produced in the previous step. +1. Finally, the `Reporting` module provides tools to generate and share reports with your team or friends via Google Sheets, Email, PDF and/or Slack. To see how some of this can be easily implemented, check our [quickstart](notebook/examples/quickstart.ipynb)! diff --git a/documentation/source/project_structure.md b/documentation/source/project_structure.md index 6e73561..a39b7aa 100644 --- a/documentation/source/project_structure.md +++ b/documentation/source/project_structure.md @@ -53,4 +53,4 @@ Configures the alembic library to manage the migrations for the DBSaver. ### [resources](https://gitlab.com/mutt_data/soam/-/tree/master/soam/resources) Files that will be used in the pipeline, like a template for an email report in a postprocess step. -[//comment]: # (TODO: review if this directory is outdated or not used any more.) +[//comment]: # (TODO: review if this directory is outdated or not used any more.) \ No newline at end of file diff --git a/setup.py b/setup.py index ef094c6..236097d 100644 --- a/setup.py +++ b/setup.py @@ -93,7 +93,6 @@ "python-decouple", "prefect==0.14.17", "filelock", - "click", "wheel", "muttlib>=1.1.2,<2", "numpy>=1.19,<1.20", @@ -101,6 +100,5 @@ ], extras_require=extra_dependencies, python_requires="~=3.6", - entry_points={'console_scripts': ['soam = soam.console:cli']}, ) # TODO: check why 'python setup.py develop' is failing to obtain muttlib, but 'pip install -e .' is working diff --git a/soam/__init__.py b/soam/__init__.py index f7baad6..c9f27ac 100644 --- a/soam/__init__.py +++ b/soam/__init__.py @@ -1,3 +1,3 @@ """Version.""" -__version__ = '0.9.3' +__version__ = '0.9.4' diff --git a/soam/models/base.py b/soam/models/base.py index d09d1e3..74de6d7 100644 --- a/soam/models/base.py +++ b/soam/models/base.py @@ -1,4 +1,8 @@ -"""Wrapping functions for SkLearn API.""" +""" +Wrapping functions for SkLearn API. + +This is inspired by wrappers developed in https://github.com/heidelbergcement/hcrystalball. +""" import abc from abc import abstractmethod import inspect