Skip to content

Commit

Permalink
feat: download artifacts from Job ID
Browse files Browse the repository at this point in the history
  • Loading branch information
datnguye committed Jan 20, 2024
1 parent 00e541a commit 6e35184
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 8 deletions.
12 changes: 9 additions & 3 deletions dbterd/adapters/dbt_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,16 @@ class DbtCloudArtifact:
"""

def __init__(self, **kwargs) -> None:
"""Initialize the base attributes to interact with API service"""
"""
Initialize the base attributes to interact with API service
Passing JOB_ID to get the latest run's artifacts. In particular to a run, let's use RUN_ID.
RUN_ID will take the precedence if specified
"""
self.host_url = kwargs.get("dbt_cloud_host_url")
self.service_token = kwargs.get("dbt_cloud_service_token")
self.account_id = kwargs.get("dbt_cloud_account_id")
self.job_id = kwargs.get("dbt_cloud_job_id")
self.run_id = kwargs.get("dbt_cloud_run_id")
self.api_version = kwargs.get("dbt_cloud_api_version")

Expand All @@ -35,13 +41,13 @@ def api_endpoint(self) -> dict:
return (
"https://{host_url}/api/{api_version}/"
"accounts/{account_id}/"
"runs/{run_id}/"
"{artifact_id}/"
"artifacts/{{path}}"
).format(
host_url=self.host_url,
api_version=self.api_version,
account_id=self.account_id,
run_id=self.run_id,
artifact_id=f"runs/{self.run_id}" if self.run_id else f"jobs/{self.job_id}",
)

@property
Expand Down
9 changes: 9 additions & 0 deletions dbterd/cli/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ def common_params(func):
default=os.environ.get("DBTERD_DBT_CLOUD_RUN_ID"),
show_default=True,
)
@click.option(
"--dbt-cloud-job-id",
help=(
"Configure dbt Cloud's Job ID. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_JOB_ID) if not specified"
),
default=os.environ.get("DBTERD_DBT_CLOUD_JOB_ID"),
show_default=True,
)
@click.option(
"--dbt-cloud-service-token",
help=(
Expand Down
14 changes: 12 additions & 2 deletions docs/nav/guide/dbt-cloud/download-artifact-from-a-job-run.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ export DBTERD_DBT_CLOUD_HOST_URL=your_value # optional, default = cloud.getdbt.c
export DBTERD_DBT_CLOUD_API_VERSION=your_value # optional, default = v2
```

Or in Powershell:

```bash
$env:DBTERD_DBT_CLOUD_SERVICE_TOKEN="your_value"
$env:DBTERD_DBT_CLOUD_ACCOUNT_ID="your_value"
$env:DBTERD_DBT_CLOUD_RUN_ID="your_value"
$env:DBTERD_DBT_CLOUD_HOST_URL="your_value" # optional, default = cloud.getdbt.com
$env:DBTERD_DBT_CLOUD_API_VERSION="your_value" # optional, default = v2
```

## 2. Genrate ERD file

We're going to use `--dbt-cloud` option to tell `dbterd` to use dbt Cloud API with all above variables.
Expand All @@ -73,9 +83,9 @@ and then, here is the sample console log:
```log
dbterd - INFO - Run with dbterd==1.0.0 (main.py:54)
dbterd - INFO - Using dbt project dir at: C:\Sources\dbterd (base.py:46)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/2442752/artifacts/manifest.json] (dbt_cloud.py:68)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/hidden/artifacts/manifest.json] (dbt_cloud.py:68)
dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/2442752/artifacts/catalog.json] (dbt_cloud.py:68)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/hidden/artifacts/catalog.json] (dbt_cloud.py:68)
dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71)
dbterd - INFO - Using dbt artifact dir at: hidden (base.py:73)
dbterd - INFO - Collected 4 table(s) and 3 relationship(s) (test_relationship.py:59)
Expand Down
50 changes: 50 additions & 0 deletions docs/nav/guide/dbt-cloud/download-artifact-from-a-job.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Download the latest artifacts from a Job

Better than looking at the Run ID which is not static all the time, we're able to implicitly tell `dbterd` to retrieve the latest artifacts from a Job (latest run) by using the [Retrieve Job Artifact](https://docs.getdbt.com/dbt-cloud/api-v2#/operations/Retrieve%20Job%20Artifact) endpoint.

The _Prerequisites_ and _Steps_ will be pretty similar to [Download artifacts from a Job Run](./download-artifact-from-a-job-run.md), mostly everything is the same but we will specify **JOB ID** instead of the _JOB RUN ID_.

!!! NOTE
_JOB RUN ID_ takes precedence to _JOB ID_ if specified

Our dbt Cloud's Job will have the URL constructed as:

```log
https://<host_url>/deploy/<account_id>/projects/irrelevant/jobs/<run_id>
```

In the above:

| URL Part | Environment Variable | CLI Option | Description |
|-------------------|---------------------------------|---------------------------|---------------------------------------------------------------------------|
| `job_id` | `DBTERD_DBT_CLOUD_JOB_ID` | `--dbt-cloud-job-id` | dbt Cloud job ID |

- Fill `your_value` and set the environment variable as below:

```bash
export DBTERD_DBT_CLOUD_SERVICE_TOKEN=your_value
export DBTERD_DBT_CLOUD_ACCOUNT_ID=your_value
export DBTERD_DBT_CLOUD_JOB_ID=your_value
export DBTERD_DBT_CLOUD_HOST_URL=your_value # optional, default = cloud.getdbt.com
export DBTERD_DBT_CLOUD_API_VERSION=your_value # optional, default = v2
```

- Generate ERD:

```bash
dbterd run --dbt-cloud [-s <dbterd selection>]
```

And the sample logs:

```log
dbterd - INFO - Run with dbterd==1.0.0 (main.py:54)
dbterd - INFO - Using dbt project dir at: C:\Sources\dbterd (base.py:46)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/jobs/hidden/artifacts/manifest.json] (dbt_cloud.py:68)
dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/jobs/hidden/artifacts/catalog.json] (dbt_cloud.py:68)
dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71)
dbterd - INFO - Using dbt artifact dir at: hidden (base.py:73)
dbterd - INFO - Collected 4 table(s) and 3 relationship(s) (test_relationship.py:59)
dbterd - INFO - C:\Sources\dbterd\target/output.dbml (base.py:170)
```
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ nav:
- Relationship Types: nav/metadata/relationship_type.md
- dbt Cloud:
- Download artifacts from a Job Run: nav/guide/dbt-cloud/download-artifact-from-a-job-run.md
- Download the latest artifacts from a Job: nav/guide/dbt-cloud/download-artifact-from-a-job.md
- Contribution Guideline ❤️: nav/development/contributing-guide.md
- License: license.md
- Change Log ↗️: https://github.com/datnguye/dbterd/releases" target="_blank
Expand Down
29 changes: 26 additions & 3 deletions tests/unit/adapters/test_dbt_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ def dbtCloudArtifact(self) -> DbtCloudArtifact:
"kwargs, expected",
[
(
dict(),
dict(dbt_cloud_run_id="run_id"),
dict(
host_url=None,
service_token=None,
account_id=None,
run_id=None,
run_id="run_id",
job_id=None,
api_version=None,
),
),
Expand All @@ -53,12 +54,13 @@ def dbtCloudArtifact(self) -> DbtCloudArtifact:
service_token="service_token",
account_id="account_id",
run_id="run_id",
job_id=None,
api_version="api_version",
),
),
],
)
def test_init(self, kwargs, expected):
def test_init_run(self, kwargs, expected):
dbt_cloud = DbtCloudArtifact(**kwargs)
assert vars(dbt_cloud) == expected
assert dbt_cloud.request_headers == {
Expand All @@ -83,6 +85,27 @@ def test_init(self, kwargs, expected):
"artifacts/catalog.json"
).format(**expected)

@pytest.mark.parametrize(
"kwargs, endpoint",
[
(
dict(dbt_cloud_run_id="run_id"),
"https://None/api/None/accounts/None/runs/run_id/artifacts/{path}",
),
(
dict(dbt_cloud_run_id="run_id", dbt_cloud_job_id="job_id"),
"https://None/api/None/accounts/None/runs/run_id/artifacts/{path}",
),
(
dict(dbt_cloud_job_id="job_id"),
"https://None/api/None/accounts/None/jobs/job_id/artifacts/{path}",
),
],
)
def test_api_endpoint(self, kwargs, endpoint):
dbt_cloud = DbtCloudArtifact(**kwargs)
assert dbt_cloud.api_endpoint == endpoint

@mock.patch("dbterd.adapters.dbt_cloud.file.write_json")
@mock.patch("dbterd.adapters.dbt_cloud.requests.get")
def test_download_artifact_ok(
Expand Down

0 comments on commit 6e35184

Please sign in to comment.