From 6e351843ee86db5a9b815aabedc1d81a647fa65e Mon Sep 17 00:00:00 2001 From: Dat Nguyen Date: Sat, 20 Jan 2024 09:51:53 +0700 Subject: [PATCH] feat: download artifacts from Job ID --- dbterd/adapters/dbt_cloud.py | 12 +++-- dbterd/cli/params.py | 9 ++++ .../download-artifact-from-a-job-run.md | 14 +++++- .../dbt-cloud/download-artifact-from-a-job.md | 50 +++++++++++++++++++ mkdocs.yml | 1 + tests/unit/adapters/test_dbt_cloud.py | 29 +++++++++-- 6 files changed, 107 insertions(+), 8 deletions(-) create mode 100644 docs/nav/guide/dbt-cloud/download-artifact-from-a-job.md diff --git a/dbterd/adapters/dbt_cloud.py b/dbterd/adapters/dbt_cloud.py index 51512fd..7d7d57b 100644 --- a/dbterd/adapters/dbt_cloud.py +++ b/dbterd/adapters/dbt_cloud.py @@ -17,10 +17,16 @@ class DbtCloudArtifact: """ def __init__(self, **kwargs) -> None: - """Initialize the base attributes to interact with API service""" + """ + Initialize the base attributes to interact with API service + + Passing JOB_ID to get the latest run's artifacts. In particular to a run, let's use RUN_ID. + RUN_ID will take the precedence if specified + """ self.host_url = kwargs.get("dbt_cloud_host_url") self.service_token = kwargs.get("dbt_cloud_service_token") self.account_id = kwargs.get("dbt_cloud_account_id") + self.job_id = kwargs.get("dbt_cloud_job_id") self.run_id = kwargs.get("dbt_cloud_run_id") self.api_version = kwargs.get("dbt_cloud_api_version") @@ -35,13 +41,13 @@ def api_endpoint(self) -> dict: return ( "https://{host_url}/api/{api_version}/" "accounts/{account_id}/" - "runs/{run_id}/" + "{artifact_id}/" "artifacts/{{path}}" ).format( host_url=self.host_url, api_version=self.api_version, account_id=self.account_id, - run_id=self.run_id, + artifact_id=f"runs/{self.run_id}" if self.run_id else f"jobs/{self.job_id}", ) @property diff --git a/dbterd/cli/params.py b/dbterd/cli/params.py index c9a5bfe..21a3c4a 100644 --- a/dbterd/cli/params.py +++ b/dbterd/cli/params.py @@ -152,6 +152,15 @@ def common_params(func): default=os.environ.get("DBTERD_DBT_CLOUD_RUN_ID"), show_default=True, ) + @click.option( + "--dbt-cloud-job-id", + help=( + "Configure dbt Cloud's Job ID. " + "Try to get OS environment variable (DBTERD_DBT_CLOUD_JOB_ID) if not specified" + ), + default=os.environ.get("DBTERD_DBT_CLOUD_JOB_ID"), + show_default=True, + ) @click.option( "--dbt-cloud-service-token", help=( diff --git a/docs/nav/guide/dbt-cloud/download-artifact-from-a-job-run.md b/docs/nav/guide/dbt-cloud/download-artifact-from-a-job-run.md index a5a378d..8efca4c 100644 --- a/docs/nav/guide/dbt-cloud/download-artifact-from-a-job-run.md +++ b/docs/nav/guide/dbt-cloud/download-artifact-from-a-job-run.md @@ -55,6 +55,16 @@ export DBTERD_DBT_CLOUD_HOST_URL=your_value # optional, default = cloud.getdbt.c export DBTERD_DBT_CLOUD_API_VERSION=your_value # optional, default = v2 ``` +Or in Powershell: + +```bash +$env:DBTERD_DBT_CLOUD_SERVICE_TOKEN="your_value" +$env:DBTERD_DBT_CLOUD_ACCOUNT_ID="your_value" +$env:DBTERD_DBT_CLOUD_RUN_ID="your_value" +$env:DBTERD_DBT_CLOUD_HOST_URL="your_value" # optional, default = cloud.getdbt.com +$env:DBTERD_DBT_CLOUD_API_VERSION="your_value" # optional, default = v2 +``` + ## 2. Genrate ERD file We're going to use `--dbt-cloud` option to tell `dbterd` to use dbt Cloud API with all above variables. @@ -73,9 +83,9 @@ and then, here is the sample console log: ```log dbterd - INFO - Run with dbterd==1.0.0 (main.py:54) dbterd - INFO - Using dbt project dir at: C:\Sources\dbterd (base.py:46) -dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/2442752/artifacts/manifest.json] (dbt_cloud.py:68) +dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/hidden/artifacts/manifest.json] (dbt_cloud.py:68) dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71) -dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/2442752/artifacts/catalog.json] (dbt_cloud.py:68) +dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/hidden/artifacts/catalog.json] (dbt_cloud.py:68) dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71) dbterd - INFO - Using dbt artifact dir at: hidden (base.py:73) dbterd - INFO - Collected 4 table(s) and 3 relationship(s) (test_relationship.py:59) diff --git a/docs/nav/guide/dbt-cloud/download-artifact-from-a-job.md b/docs/nav/guide/dbt-cloud/download-artifact-from-a-job.md new file mode 100644 index 0000000..02c81e7 --- /dev/null +++ b/docs/nav/guide/dbt-cloud/download-artifact-from-a-job.md @@ -0,0 +1,50 @@ +# Download the latest artifacts from a Job + +Better than looking at the Run ID which is not static all the time, we're able to implicitly tell `dbterd` to retrieve the latest artifacts from a Job (latest run) by using the [Retrieve Job Artifact](https://docs.getdbt.com/dbt-cloud/api-v2#/operations/Retrieve%20Job%20Artifact) endpoint. + +The _Prerequisites_ and _Steps_ will be pretty similar to [Download artifacts from a Job Run](./download-artifact-from-a-job-run.md), mostly everything is the same but we will specify **JOB ID** instead of the _JOB RUN ID_. + +!!! NOTE + _JOB RUN ID_ takes precedence to _JOB ID_ if specified + +Our dbt Cloud's Job will have the URL constructed as: + +```log +https:///deploy//projects/irrelevant/jobs/ +``` + +In the above: + +| URL Part | Environment Variable | CLI Option | Description | +|-------------------|---------------------------------|---------------------------|---------------------------------------------------------------------------| +| `job_id` | `DBTERD_DBT_CLOUD_JOB_ID` | `--dbt-cloud-job-id` | dbt Cloud job ID | + +- Fill `your_value` and set the environment variable as below: + +```bash +export DBTERD_DBT_CLOUD_SERVICE_TOKEN=your_value +export DBTERD_DBT_CLOUD_ACCOUNT_ID=your_value +export DBTERD_DBT_CLOUD_JOB_ID=your_value +export DBTERD_DBT_CLOUD_HOST_URL=your_value # optional, default = cloud.getdbt.com +export DBTERD_DBT_CLOUD_API_VERSION=your_value # optional, default = v2 +``` + +- Generate ERD: + +```bash +dbterd run --dbt-cloud [-s ] +``` + +And the sample logs: + +```log +dbterd - INFO - Run with dbterd==1.0.0 (main.py:54) +dbterd - INFO - Using dbt project dir at: C:\Sources\dbterd (base.py:46) +dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/jobs/hidden/artifacts/manifest.json] (dbt_cloud.py:68) +dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71) +dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/jobs/hidden/artifacts/catalog.json] (dbt_cloud.py:68) +dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71) +dbterd - INFO - Using dbt artifact dir at: hidden (base.py:73) +dbterd - INFO - Collected 4 table(s) and 3 relationship(s) (test_relationship.py:59) +dbterd - INFO - C:\Sources\dbterd\target/output.dbml (base.py:170) +``` diff --git a/mkdocs.yml b/mkdocs.yml index 3e4185f..1ac070e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,6 +25,7 @@ nav: - Relationship Types: nav/metadata/relationship_type.md - dbt Cloud: - Download artifacts from a Job Run: nav/guide/dbt-cloud/download-artifact-from-a-job-run.md + - Download the latest artifacts from a Job: nav/guide/dbt-cloud/download-artifact-from-a-job.md - Contribution Guideline ❤️: nav/development/contributing-guide.md - License: license.md - Change Log ↗️: https://github.com/datnguye/dbterd/releases" target="_blank diff --git a/tests/unit/adapters/test_dbt_cloud.py b/tests/unit/adapters/test_dbt_cloud.py index 7b903d0..5622548 100644 --- a/tests/unit/adapters/test_dbt_cloud.py +++ b/tests/unit/adapters/test_dbt_cloud.py @@ -31,12 +31,13 @@ def dbtCloudArtifact(self) -> DbtCloudArtifact: "kwargs, expected", [ ( - dict(), + dict(dbt_cloud_run_id="run_id"), dict( host_url=None, service_token=None, account_id=None, - run_id=None, + run_id="run_id", + job_id=None, api_version=None, ), ), @@ -53,12 +54,13 @@ def dbtCloudArtifact(self) -> DbtCloudArtifact: service_token="service_token", account_id="account_id", run_id="run_id", + job_id=None, api_version="api_version", ), ), ], ) - def test_init(self, kwargs, expected): + def test_init_run(self, kwargs, expected): dbt_cloud = DbtCloudArtifact(**kwargs) assert vars(dbt_cloud) == expected assert dbt_cloud.request_headers == { @@ -83,6 +85,27 @@ def test_init(self, kwargs, expected): "artifacts/catalog.json" ).format(**expected) + @pytest.mark.parametrize( + "kwargs, endpoint", + [ + ( + dict(dbt_cloud_run_id="run_id"), + "https://None/api/None/accounts/None/runs/run_id/artifacts/{path}", + ), + ( + dict(dbt_cloud_run_id="run_id", dbt_cloud_job_id="job_id"), + "https://None/api/None/accounts/None/runs/run_id/artifacts/{path}", + ), + ( + dict(dbt_cloud_job_id="job_id"), + "https://None/api/None/accounts/None/jobs/job_id/artifacts/{path}", + ), + ], + ) + def test_api_endpoint(self, kwargs, endpoint): + dbt_cloud = DbtCloudArtifact(**kwargs) + assert dbt_cloud.api_endpoint == endpoint + @mock.patch("dbterd.adapters.dbt_cloud.file.write_json") @mock.patch("dbterd.adapters.dbt_cloud.requests.get") def test_download_artifact_ok(