Skip to content

Commit

Permalink
Publish PySpark data assets
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianisk committed Mar 20, 2024
1 parent 5983f93 commit d322cb6
Showing 1 changed file with 23 additions and 1 deletion.
24 changes: 23 additions & 1 deletion .github/workflows/publish-contracts-and-assets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
push:
branches:
- 'main'
- 'pyspark_cicd'
permissions:
# Required to checkout the contracts from the repo
contents: read
Expand Down Expand Up @@ -81,12 +82,33 @@ jobs:
run: curl -sSL https://install.python-poetry.org | python3 -
shell: bash
- name: Install dependencies
run: poetry install
shell: bash
run: |
# Install root level dependencies
poetry install
# Create a virtual environment for the PySpark project, install dependencies
python3 -m venv "pyspark/.venv"
pyspark/.venv/bin/pip install -r pyspark/requirements.txt
- name: Run migrations from db_migrations directory
working-directory: ./db_migrations
run: poetry run alembic upgrade head
shell: bash

- name: Register PySpark Data Assets
uses: gabledata/cicd/github-actions/register-data-assets@python_path
with:
# Provide API key and endpoint secrets
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE}}
python-path: pyspark/.venv/bin/python
# List of paths to Protobuf files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--project-root pyspark \
--spark-job-entrypoint "job.py --final_output_table pnw_bookings_30_days" \
--csv-schema-file pyspark/schemas.csv
- name: Register Protobuf Data Assets
uses: gabledata/cicd/github-actions/register-data-assets@latest
with:
Expand Down

0 comments on commit d322cb6

Please sign in to comment.