From d322cb6b98eae83dbb0e5c5b3759b29ec92d87f1 Mon Sep 17 00:00:00 2001 From: Adrian Kreuziger Date: Wed, 20 Mar 2024 15:27:30 -0700 Subject: [PATCH] Publish PySpark data assets --- .../publish-contracts-and-assets.yml | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish-contracts-and-assets.yml b/.github/workflows/publish-contracts-and-assets.yml index 242bf8c1..fe6b4e44 100644 --- a/.github/workflows/publish-contracts-and-assets.yml +++ b/.github/workflows/publish-contracts-and-assets.yml @@ -5,6 +5,7 @@ on: push: branches: - 'main' + - 'pyspark_cicd' permissions: # Required to checkout the contracts from the repo contents: read @@ -81,12 +82,33 @@ jobs: run: curl -sSL https://install.python-poetry.org | python3 - shell: bash - name: Install dependencies - run: poetry install shell: bash + run: | + # Install root level dependencies + poetry install + # Create a virtual environment for the PySpark project, install dependencies + python3 -m venv "pyspark/.venv" + pyspark/.venv/bin/pip install -r pyspark/requirements.txt + - name: Run migrations from db_migrations directory working-directory: ./db_migrations run: poetry run alembic upgrade head shell: bash + + - name: Register PySpark Data Assets + uses: gabledata/cicd/github-actions/register-data-assets@python_path + with: + # Provide API key and endpoint secrets + gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE}} + gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE}} + python-path: pyspark/.venv/bin/python + # List of paths to Protobuf files that should be checked with support for glob syntax. + # Can either be specified as a space separated list ('event1.proto event2.proto'), or + # a multiline string + data-asset-options: | + --project-root pyspark \ + --spark-job-entrypoint "job.py --final_output_table pnw_bookings_30_days" \ + --csv-schema-file pyspark/schemas.csv - name: Register Protobuf Data Assets uses: gabledata/cicd/github-actions/register-data-assets@latest with: