Nightly Containers on CUDA 12.1 (JAX pinned) (schedule) #37
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Nightly Containers on CUDA 12.1 (JAX pinned) | |
run-name: Nightly Containers on CUDA 12.1 (JAX pinned) (${{ github.event_name == 'workflow_run' && format('nightly {0}', github.event.workflow_run.created_at) || github.event_name }}) | |
on: | |
schedule: | |
- cron: '30 9 * * *' # Pacific Time 01:30 AM in UTC | |
workflow_dispatch: | |
inputs: | |
JAX_BASE_IMAGE: | |
type: string | |
description: 'Base Multiarch JAX Image' | |
default: 'ghcr.io/nvidia/jax-toolbox-internal:6473019396-jax-multiarch' | |
required: true | |
REPO_T5X: | |
type: string | |
description: URL of T5X repository to check out | |
required: false | |
default: "https://github.com/nvjax-svc-0/t5x.git" | |
REF_T5X: | |
type: string | |
description: Git commit, tag, or branch for T5X | |
required: false | |
default: unpin-tfds-gpu-extra | |
REPO_TE: | |
type: string | |
description: URL of TE repository to check out | |
required: false | |
default: "https://github.com/NVIDIA/TransformerEngine.git" | |
REF_TE: | |
type: string | |
description: Git commit, tag, or branch for TE | |
required: false | |
default: v0.13 | |
PUBLISH: | |
type: boolean | |
description: Publish dated images and update the 'latest' tag? | |
default: false | |
required: false | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
permissions: | |
contents: read # to fetch code | |
actions: write # to cancel previous workflows | |
packages: write # to upload container | |
env: | |
DEFAULT_JAX_BASE_IMAGE: ghcr.io/nvidia/jax-toolbox-internal:6473019396-jax-multiarch | |
DEFAULT_REPO_T5X: https://github.com/nvjax-svc-0/t5x.git | |
DEFAULT_REF_T5X: unpin-tfds-gpu-extra | |
DEFAULT_REPO_TE: https://github.com/NVIDIA/TransformerEngine.git | |
DEFAULT_REF_TE: v0.13 | |
jobs: | |
metadata: | |
runs-on: ubuntu-22.04 | |
outputs: | |
BUILD_DATE: ${{ steps.meta.outputs.BUILD_DATE }} | |
JAX_BASE_IMAGE: ${{ steps.meta.outputs.JAX_BASE_IMAGE}} | |
REPO_T5X: ${{ steps.meta.outputs.REPO_T5X }} | |
REF_T5X: ${{ steps.meta.outputs.REF_T5X }} | |
REPO_TE: ${{ steps.meta.outputs.REPO_TE }} | |
REF_TE: ${{ steps.meta.outputs.REF_TE }} | |
steps: | |
- name: Set build date and base image | |
id: meta | |
shell: bash -x -e {0} | |
run: | | |
BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d') | |
echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT | |
if [[ -z "${{ inputs.JAX_BASE_IMAGE }}" ]]; then | |
echo "JAX_BASE_IMAGE=${{ env.DEFAULT_JAX_BASE_IMAGE }}" >> $GITHUB_OUTPUT | |
else | |
echo "JAX_BASE_IMAGE=${{ inputs.JAX_BASE_IMAGE }}" >> $GITHUB_OUTPUT | |
fi | |
if [[ -z "${{ inputs.REPO_T5X }}" ]]; then | |
echo "REPO_T5X=${{ env.DEFAULT_REPO_T5X }}" >> $GITHUB_OUTPUT | |
else | |
echo "REPO_T5X=${{ inputs.REPO_T5X }}" >> $GITHUB_OUTPUT | |
fi | |
if [[ -z "${{ inputs.REF_T5X }}" ]]; then | |
echo "REF_T5X=${{ env.DEFAULT_REF_T5X }}" >> $GITHUB_OUTPUT | |
else | |
echo "REF_T5X=${{ inputs.REF_T5X }}" >> $GITHUB_OUTPUT | |
fi | |
if [[ -z "${{ inputs.REPO_TE }}" ]]; then | |
echo "REPO_TE=${{ env.DEFAULT_REPO_TE }}" >> $GITHUB_OUTPUT | |
else | |
echo "REPO_TE=${{ inputs.REPO_TE }}" >> $GITHUB_OUTPUT | |
fi | |
if [[ -z "${{ inputs.REF_TE }}" ]]; then | |
echo "REF_TE=${{ env.DEFAULT_REF_TE }}" >> $GITHUB_OUTPUT | |
else | |
echo "REF_TE=${{ inputs.REF_TE }}" >> $GITHUB_OUTPUT | |
fi | |
build-pax: | |
needs: [metadata] | |
uses: ./.github/workflows/_build_pax.yaml | |
with: | |
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | |
BASE_IMAGE: ${{ needs.metadata.outputs.JAX_BASE_IMAGE }} | |
secrets: inherit | |
build-rosetta-pax: | |
uses: ./.github/workflows/_build_rosetta.yaml | |
needs: [metadata, build-pax] | |
with: | |
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | |
BASE_IMAGE: ${{ needs.build-pax.outputs.DOCKER_TAGS }} | |
BASE_LIBRARY: pax | |
PLATFORMS: '["amd64"]' | |
secrets: inherit | |
build-t5x: | |
needs: [metadata] | |
uses: ./.github/workflows/_build_t5x.yaml | |
with: | |
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | |
BASE_IMAGE: ${{ needs.metadata.outputs.JAX_BASE_IMAGE }} | |
REPO_T5X: ${{ needs.metadata.outputs.REPO_T5X }} | |
REF_T5X: ${{ needs.metadata.outputs.REF_T5X }} | |
REPO_TE: ${{ needs.metadata.outputs.REPO_TE }} | |
REF_TE: ${{ needs.metadata.outputs.REF_TE }} | |
secrets: inherit | |
build-rosetta-t5x: | |
uses: ./.github/workflows/_build_rosetta.yaml | |
needs: [metadata, build-t5x] | |
with: | |
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | |
BASE_IMAGE: ${{ needs.build-t5x.outputs.DOCKER_TAGS }} | |
BASE_LIBRARY: t5x | |
PLATFORMS: '["amd64"]' | |
secrets: inherit | |
build-summary: | |
needs: [metadata, build-t5x, build-rosetta-t5x, build-pax, build-rosetta-pax] | |
if: always() | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Generate job summary for container build | |
shell: bash -x -e {0} | |
run: | | |
cat > $GITHUB_STEP_SUMMARY << EOF | |
# Images created | |
| Image | Link | | |
| ------------ | -------------------------------------------------- | | |
| JAX (input) | ${{ needs.metadata.outputs.JAX_BASE_IMAGE }} | | |
| T5X | ${{ needs.build-t5x.outputs.DOCKER_TAGS }} | | |
| ROSETTA(T5X) | ${{ needs.build-rosetta-t5x.outputs.DOCKER_TAGS }} | | |
| PAX | ${{ needs.build-pax.outputs.DOCKER_TAGS }} | | |
| ROSETTA(pax) | ${{ needs.build-rosetta-pax.outputs.DOCKER_TAGS }} | | |
EOF | |
test-jax: | |
needs: metadata | |
uses: ./.github/workflows/_test_jax.yaml | |
with: | |
JAX_IMAGE: ${{ needs.metadata.outputs.JAX_BASE_IMAGE }} | |
secrets: inherit | |
test-pax: | |
needs: build-pax | |
uses: ./.github/workflows/_test_pax.yaml | |
with: | |
PAX_IMAGE: ${{ needs.build-pax.outputs.DOCKER_TAGS }} | |
secrets: inherit | |
test-t5x: | |
needs: build-t5x | |
uses: ./.github/workflows/_test_t5x.yaml | |
with: | |
T5X_IMAGE: ${{ needs.build-t5x.outputs.DOCKER_TAGS }} | |
secrets: inherit | |
# TODO(terry): This is missing the rosetta tests which can only be added | |
# After a fix for the TB log collision is pushed. | |
finalize: | |
if: always() | |
# TODO: use dynamic matrix to make dependencies self-updating | |
needs: [build-summary, test-jax, test-pax] | |
uses: ./.github/workflows/_finalize.yaml | |
with: | |
PUBLISH_BADGE: false | |
secrets: inherit |