From 3bd28dfd7e510ce188c698c6300bd492e7e0f155 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 8 Dec 2022 16:51:34 +0100 Subject: [PATCH 1/8] Update pre-commit versions --- .pre-commit-config.yaml | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7586b2d..b665adc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,39 +1,33 @@ repos: -- repo: https://github.com/asottile/seed-isort-config - rev: v1.9.4 - hooks: - - id: seed-isort-config -- repo: https://github.com/pre-commit/mirrors-isort - rev: v4.3.21 +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 hooks: - id: isort - repo: https://github.com/ambv/black - rev: 19.10b0 + rev: 22.10.0 hooks: - id: black -- repo: https://gitlab.com/pycqa/flake8 - rev: 3.7.9 +- repo: https://github.com/pycqa/flake8 + rev: 6.0.0 hooks: - id: flake8 - additional_dependencies: ['flake8-coding==1.3.2', 'flake8-copyright==0.2.2', 'flake8-debugger==3.2.1', 'flake8-mypy==17.8.0'] + additional_dependencies: ['flake8-coding==1.3.2', 'flake8-copyright==0.2.3', 'flake8-debugger==4.1.2', 'flake8-mypy==17.8.0'] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.5.0 + rev: v4.3.0 hooks: - - id: trailing-whitespace + - id: check-json + - id: check-merge-conflict - id: check-yaml + - id: debug-statements - id: mixed-line-ending - - id: name-tests-test - args: ['--django'] - id: requirements-txt-fixer + - id: trailing-whitespace - repo: https://github.com/codespell-project/codespell - rev: v1.16.0 + rev: v2.2.2 hooks: - id: codespell exclude_types: [json] args: ['--ignore-words-list=feld'] - repo: meta hooks: - - id: check-hooks-apply - id: check-useless-excludes -default_language_version: - python: python3.7 From 017f716662dec56d07e47531b0c390101566b058 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 8 Dec 2022 16:51:47 +0100 Subject: [PATCH 2/8] Fix requirements_dev.txt --- requirements_dev.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index a62f093..3acf7a9 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -5,9 +5,9 @@ coverage==4.5.4 flake8==3.7.8 mlflow pip==19.2.3 - pytest==4.6.5 -pytest-runner==5.1Sphinx==1.8.5 +pytest-runner==5.1 +Sphinx==1.8.5 tabulate tox==3.14.0 tqdm From 4c1e1e58cd0459b8ea0531bee6caec25f792b905 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 8 Dec 2022 17:17:46 +0100 Subject: [PATCH 3/8] Fix flake-8 version to workaround https://github.com/savoirfairelinux/flake8-copyright/issues/19 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b665adc..f45f858 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 5.0.4 hooks: - id: flake8 additional_dependencies: ['flake8-coding==1.3.2', 'flake8-copyright==0.2.3', 'flake8-debugger==4.1.2', 'flake8-mypy==17.8.0'] From f57c92d4c01017be03385401719fbae574032179 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 8 Dec 2022 17:18:04 +0100 Subject: [PATCH 4/8] Update Minio based example --- examples/log-artifacts-minio/Dockerfile | 2 +- examples/log-artifacts-minio/run.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/log-artifacts-minio/Dockerfile b/examples/log-artifacts-minio/Dockerfile index fd47af9..53609ba 100644 --- a/examples/log-artifacts-minio/Dockerfile +++ b/examples/log-artifacts-minio/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7.0 +FROM python:3.8.15 RUN mkdir /mlflow/ RUN mkdir -p /mlflow/fileStore diff --git a/examples/log-artifacts-minio/run.py b/examples/log-artifacts-minio/run.py index e0d60e7..ddf11bb 100644 --- a/examples/log-artifacts-minio/run.py +++ b/examples/log-artifacts-minio/run.py @@ -5,7 +5,7 @@ import mlflow from minio import Minio -from minio.error import BucketAlreadyOwnedByYou +from minio.error import S3Error os.environ.update( { @@ -25,8 +25,11 @@ try: minioClient.make_bucket("mlflow") -except BucketAlreadyOwnedByYou as err: - print(err) +except S3Error as err: + if err.code == "BucketAlreadyOwnedByYou": + pass + else: + raise policy = { "Statement": [ @@ -72,3 +75,5 @@ # Log some directories too mlflow.log_artifacts("files/") + +print("DONE") From 6bb4d7ef3715426bcb73cef39ac56e7ef34a83c3 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 8 Dec 2022 17:55:12 +0100 Subject: [PATCH 5/8] Fix support for MLFlow 2.0.1 and fix linting --- .flake8 | 4 +- .pre-commit-config.yaml | 1 + comet_for_mlflow/comet_for_mlflow.py | 73 ++++++++++++++++---------- comet_for_mlflow/compat.py | 76 ++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 28 deletions(-) create mode 100644 comet_for_mlflow/compat.py diff --git a/.flake8 b/.flake8 index 0677bf3..ad98539 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] -exclude = .git,__pycache__,data,tools -ignore = E101, E111, E114, E115, E116, E117, E121, E122, E123, E124, E125, E126, E127, E128, E129, E131, E133, E2, E3, E5, E501, E701, E702, E703, E704, W1, W2, W3, W503, W504 +max-line-length = 100 +extend-ignore = E203 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f45f858..36915ce 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,6 +11,7 @@ repos: rev: 5.0.4 hooks: - id: flake8 + args: ['--config=.flake8'] additional_dependencies: ['flake8-coding==1.3.2', 'flake8-copyright==0.2.3', 'flake8-debugger==4.1.2', 'flake8-mypy==17.8.0'] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.3.0 diff --git a/comet_for_mlflow/comet_for_mlflow.py b/comet_for_mlflow/comet_for_mlflow.py index 86025bb..8f132bb 100644 --- a/comet_for_mlflow/comet_for_mlflow.py +++ b/comet_for_mlflow/comet_for_mlflow.py @@ -41,13 +41,19 @@ from comet_ml.exceptions import CometRestApiException from comet_ml.offline import upload_single_offline_experiment from mlflow.entities.run_tag import RunTag -from mlflow.entities.view_type import ViewType from mlflow.tracking import _get_store from mlflow.tracking._model_registry.utils import _get_store as get_model_registry_store from mlflow.tracking.registry import UnsupportedModelRegistryStoreURIException from tabulate import tabulate from tqdm import tqdm +from .compat import ( + get_artifact_repository, + get_mlflow_model_name, + get_mlflow_run_id, + search_mlflow_store_experiments, + search_mlflow_store_runs, +) from .file_writer import JsonLinesFile from .utils import ( get_comet_project_name, @@ -65,18 +71,10 @@ pass -try: - # MLFLOW version 1.4.0 - from mlflow.store.artifact.artifact_repository_registry import ( - get_artifact_repository, - ) -except ImportError: - # MLFLOW version < 1.4.0 - from mlflow.store.artifact_repository_registry import get_artifact_repository - logging.basicConfig(level=logging.INFO, format="%(message)s") LOGGER = logging.getLogger() + # Install a global exception hook def except_hook(exc_type, exc_value, exc_traceback): Reporting.report( @@ -137,8 +135,7 @@ def __init__( except UnsupportedModelRegistryStoreURIException: self.model_registry_store = None - # Most of list_experiments returns a list anyway - self.mlflow_experiments = list(self.store.list_experiments()) + self.mlflow_experiments = search_mlflow_store_experiments(self.store) self.len_experiments = len(self.mlflow_experiments) # We start counting at 0 self.summary = { @@ -239,22 +236,28 @@ def prepare(self): LOGGER.info("") LOGGER.info( - "If you need support, you can contact us at http://chat.comet.ml/ or https://comet.ml/docs/quick-start/#getting-support" + """If you need support, you can contact us at http://chat.comet.ml/""" + """ or https://comet.ml/docs/quick-start/#getting-support""" ) LOGGER.info("") def prepare_mlflow_exp( - self, exp, + self, + exp, ): - runs_info = self.store.list_run_infos(exp.experiment_id, ViewType.ALL) + runs_info = search_mlflow_store_runs(self.store, exp.experiment_id) len_runs = len(runs_info) for run_number, run_info in enumerate(runs_info): try: - run_id = run_info.run_id + run_id = get_mlflow_run_id(run_info) + run = self.store.get_run(run_id) LOGGER.info( - "## Preparing run %d/%d [%s]", run_number + 1, len_runs, run_id, + "## Preparing run %d/%d [%s]", + run_number + 1, + len_runs, + run_id, ) LOGGER.debug( "## Preparing run %d/%d: %r", run_number + 1, len_runs, run @@ -410,15 +413,25 @@ def prepare_single_mlflow_run(self, run, original_experiment_name): break if matching_model: + model_name = get_mlflow_model_name(matching_model) + + prefix = "models/" + if artifact_path.startswith(prefix): + comet_artifact_path = artifact_path[len(prefix) :] + else: + comet_artifact_path = artifact_path + json_writer.log_artifact_as_model( local_artifact_path, - artifact_path, + comet_artifact_path, run_start_time, - matching_model.registered_model.name, + model_name, ) else: json_writer.log_artifact_as_asset( - local_artifact_path, artifact_path, run_start_time, + local_artifact_path, + artifact_path, + run_start_time, ) return self.compress_archive(run.info.run_id) @@ -438,12 +451,15 @@ def upload(self, prepared_data): project_note = experiment.tags.get("mlflow.note.content", None) if project_note: note_template = ( - u"/!\\ This project notes has been copied from MLFlow. It might be overwritten if you run comet_for_mlflow again/!\\ \n%s" + "/!\\ This project notes has been copied from MLFlow." + " It might be overwritten if you run comet_for_mlflow again/!\\ \n%s" % project_note ) # We don't support Unicode project notes yet self.api_client.set_project_notes( - self.workspace, project_name, note_template, + self.workspace, + project_name, + note_template, ) all_project_names.append(project_name) @@ -487,7 +503,8 @@ def upload(self, prepared_data): LOGGER.info("\t- %s", url) LOGGER.info( - "Get deeper instrumentation by adding Comet SDK to your project: https://comet.ml/docs/python-sdk/mlflow/" + "Get deeper instrumentation by adding Comet SDK to your project:" + " https://comet.ml/docs/python-sdk/mlflow/" ) LOGGER.info("") @@ -598,11 +615,13 @@ def create_or_login(self): Reporting.report("mlflow_new_user", api_key=new_account["apiKey"]) LOGGER.info( - "A Comet.ml account has been created for you and an email was sent to you to setup your password later." + "A Comet.ml account has been created for you and an email was sent to" + " you to setup your password later." ) save_api_key(new_account["apiKey"]) LOGGER.info( - "Your Comet API Key has been saved to ~/.comet.ini, it is also available on your Comet.ml dashboard." + "Your Comet API Key has been saved to ~/.comet.ini, it is also" + " available on your Comet.ml dashboard." ) return ( new_account["apiKey"], @@ -610,7 +629,9 @@ def create_or_login(self): ) else: LOGGER.info( - "An account already exists for this account, please input your API Key below (you can find it in your Settings page, https://comet.ml/docs/quick-start/#getting-your-comet-api-key):" + "An account already exists for this account, please input your API Key" + " below (you can find it in your Settings page," + " https://comet.ml/docs/quick-start/#getting-your-comet-api-key):" ) api_key = input("API Key: ") diff --git a/comet_for_mlflow/compat.py b/comet_for_mlflow/compat.py new file mode 100644 index 0000000..8ab2619 --- /dev/null +++ b/comet_for_mlflow/compat.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2020 Comet.ml Team. +# +# This file is part of Comet-For-MLFlow +# (see https://github.com/comet-ml/comet-for-mlflow). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +""" +Contains code to support multiple versions of MLFlow +""" +from mlflow.entities.view_type import ViewType + +try: + # MLFLOW version 1.4.0 + from mlflow.store.artifact.artifact_repository_registry import ( # noqa + get_artifact_repository, + ) +except ImportError: + # MLFLOW version < 1.4.0 + from mlflow.store.artifact_repository_registry import ( # noqa + get_artifact_repository, + ) + + +def search_mlflow_store_experiments(mlflow_store): + if hasattr(mlflow_store, "search_experiments"): + # MLflow supports search for up to 50000 experiments, defined in + # mlflow/store/tracking/__init__.py + mlflow_experiments = mlflow_store.search_experiments(max_results=50000) + # TODO: Check if there are more than 50000 experiments + return list(mlflow_experiments) + else: + return list(mlflow_store.list_experiments()) + + +def search_mlflow_store_runs(mlflow_store, experiment_id): + if hasattr(mlflow_store, "search_runs"): + # MLflow supports search for up to 50000 experiments, defined in + # mlflow/store/tracking/__init__.py + return mlflow_store.search_runs( + [experiment_id], + filter_string="", + run_view_type=ViewType.ALL, + max_results=50000, + ) + else: + return mlflow_store.list_run_infos(experiment_id, ViewType.ALL) + + +def get_mlflow_run_id(mlflow_run): + if hasattr(mlflow_run, "info"): + return mlflow_run.info.run_id + else: + return mlflow_run.run_id + + +def get_mlflow_model_name(mlflow_model): + if hasattr(mlflow_model, "name"): + return mlflow_model.name + else: + return mlflow_model.registered_model.name From db8a523890d199c80bea9fe166ce03eed65e1df3 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 8 Dec 2022 17:55:44 +0100 Subject: [PATCH 6/8] Fix potential race condition when writing the Zip file --- comet_for_mlflow/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comet_for_mlflow/utils.py b/comet_for_mlflow/utils.py index 04d9934..a93b86f 100644 --- a/comet_for_mlflow/utils.py +++ b/comet_for_mlflow/utils.py @@ -100,6 +100,7 @@ def write_comet_experiment_metadata_file( zipfile = ZipFile(archive_path, "a") zipfile.writestr("experiment.json", json.dumps(data)) + zipfile.close() def save_api_key(api_key): From 0a1dedec0ca5d08695e8805ccf56e77300e3990e Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 8 Dec 2022 18:12:22 +0100 Subject: [PATCH 7/8] Fix lint --- comet_for_mlflow/cli.py | 14 ++++++++++---- comet_for_mlflow/file_writer.py | 6 ++---- tests/test_comet_for_mlflow.py | 5 ++++- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/comet_for_mlflow/cli.py b/comet_for_mlflow/cli.py index 7468efb..75c7b32 100644 --- a/comet_for_mlflow/cli.py +++ b/comet_for_mlflow/cli.py @@ -42,17 +42,22 @@ def main(): "--no-upload", dest="upload", action="store_false", - help="Do not upload the prepared experiments to comet.ml; will not create comet.ml projects", + help="Do not upload the prepared experiments to comet.ml;" + " will not create comet.ml projects", ) parser.set_defaults(upload=True) parser.add_argument( "--api-key", - help="Set the Comet API key; required with --upload (the default); can also be configured in the usual places", + help="Set the Comet API key; required with --upload (the default);" + " can also be configured in the usual places", ) parser.add_argument( "--mlflow-store-uri", - help="Set the MLFlow store uri. The MLFlow store uri to used to retrieve MLFlow runs, given directly to MLFlow, and supports all MLFlow schemes (file:// or SQLAlchemy-compatible database connection strings). If not set, reads MLFLOW_TRACKING_URI environment variable", + help="Set the MLFlow store uri. The MLFlow store uri to used to retrieve" + " MLFlow runs, given directly to MLFlow, and supports all MLFlow schemes" + " (file:// or SQLAlchemy-compatible database connection strings)." + " If not set, reads MLFLOW_TRACKING_URI environment variable", ) parser.add_argument( @@ -83,7 +88,8 @@ def main(): help="Answer all yes/no questions automatically with 'no'", ) parser.add_argument( - "--email", help="Set email address if needed for creating a comet.ml account", + "--email", + help="Set email address if needed for creating a comet.ml account", ) args = parser.parse_args() diff --git a/comet_for_mlflow/file_writer.py b/comet_for_mlflow/file_writer.py index 0772e80..10a8871 100644 --- a/comet_for_mlflow/file_writer.py +++ b/comet_for_mlflow/file_writer.py @@ -35,14 +35,12 @@ def generate_guid(): - """ Generate a GUID - """ + """Generate a GUID""" return uuid.uuid4().hex class JsonLinesFile(object): - """ A context manager to write a JSON Lines file, also called newline-delimited JSON. - """ + """A context manager to write a JSON Lines file, also called newline-delimited JSON.""" def __init__(self, filepath, tmpdir): self.filepath = filepath diff --git a/tests/test_comet_for_mlflow.py b/tests/test_comet_for_mlflow.py index 300e2fd..e22e054 100644 --- a/tests/test_comet_for_mlflow.py +++ b/tests/test_comet_for_mlflow.py @@ -60,7 +60,10 @@ def test_conversion(tmp_path, monkeypatch): url = url_join(SERVER_ADDRESS, "isAlive/ver") responses.add( - responses.GET, url, json=backend_version_body, status=200, + responses.GET, + url, + json=backend_version_body, + status=200, ) # Check that comet_for_mlflow have created an offline experiment From efc1a007088c1abfa6e3f194c6ed3eb9edf15490 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 8 Dec 2022 19:03:13 +0100 Subject: [PATCH 8/8] Update packaging action --- .github/workflows/package.yml | 31 +++++++++++++++++++++++++++++++ .github/workflows/tests.yml | 26 -------------------------- 2 files changed, 31 insertions(+), 26 deletions(-) create mode 100644 .github/workflows/package.yml diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml new file mode 100644 index 0000000..4ac86c3 --- /dev/null +++ b/.github/workflows/package.yml @@ -0,0 +1,31 @@ +name: package + +on: + push: + release: + types: [published] + + +jobs: + build-package: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: 3.9 + - name: Build pip package + run: | + pip install -U pip build + python3 -m build --sdist --wheel --outdir dist/ . + - name: Archive Pypi artifacts + uses: actions/upload-artifact@v3 + with: + name: pypi_dist + path: dist + - name: Publish package + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.TWINE_PROD_PASSWORD }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f25ea4c..0f97a06 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -43,29 +43,3 @@ jobs: pip install pre-commit - name: Run pre-commit run: pre-commit run --all-files - package: - runs-on: ubuntu-18.04 - needs: [test, lint] - steps: - - uses: actions/checkout@v1 - - name: Set up Python 3.7 - uses: actions/setup-python@v1 - with: - python-version: 3.7 - - name: Package and upload to Pypi Test - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.TWINE_TEST_PASSWORD }} - run: | - python -m pip install --upgrade pip - pip install -U wheel twine - python setup.py bdist_wheel sdist - ls dist/ - twine upload --repository-url https://test.pypi.org/legacy/ --skip-existing dist/* - - name: Upload to Pypi production if it's a new tag - if: contains(github.ref, 'tags') - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.TWINE_PROD_PASSWORD }} - run: | - twine upload dist/*