diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 00000000..a6c7860f --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,164 @@ +name: Build and push docker image to registry + +on: + workflow_dispatch: + push: + branches: + - 'main' + tags: + - 'v*' + pull_request: + paths: + - ".github/workflows/build.yaml" + - ".github/workflows/matrix.json" + - "integration-tests/**" + - "backends/**" + - "core/**" + - "router/**" + - "Cargo.lock" + - "rust-toolchain.toml" + - "Dockerfile" + branches: + - 'main' + +jobs: + matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - id: set-matrix + run: | + branchName=$(echo '${{ github.ref }}' | sed 's,refs/heads/,,g') + matrix=$(jq --arg branchName "$branchName" 'map(. | select((.runOn==$branchName) or (.runOn=="always")) )' .github/workflows/matrix.json) + echo "{\"include\":$(echo $matrix)}" + echo ::set-output name=matrix::{\"include\":$(echo $matrix)}\" + + build-and-push-image: + needs: matrix + strategy: + matrix: ${{fromJson(needs.matrix.outputs.matrix)}} + concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{matrix.name}}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + runs-on: + group: aws-use1-r7i-8xlarge-priv + permissions: + contents: write + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + security-events: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + install: true + buildkitd-config-inline: | + [registry."docker.io"] + mirrors = ["registry-us-east-1.prod.aws.ci.huggingface.tech"] + + - name: Configure sccache + uses: actions/github-script@v6 + with: + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); + core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4 + + - name: Login to internal Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + registry: ${{ secrets.REGISTRY_URL }} + + - name: Login to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ secrets.REGISTRY_URL }}/api-inference/text-embeddings-inference + ghcr.io/huggingface/text-embeddings-inference + flavor: | + latest=false + tags: | + type=semver,pattern=${{ matrix.imageNamePrefix }}{{version}} + type=semver,pattern=${{ matrix.imageNamePrefix }}{{major}}.{{minor}} + type=raw,value=${{ matrix.imageNamePrefix }}latest + type=raw,value=${{ matrix.imageNamePrefix }}sha-${{ env.GITHUB_SHA_SHORT }} + + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v6 + with: + context: . + file: ${{ matrix.dockerfile }} + push: ${{ github.event_name != 'pull_request' }} + platforms: 'linux/amd64' + build-args: | + SCCACHE_GHA_ENABLED=${{ matrix.sccache }} + ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} + ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} + CUDA_COMPUTE_CAP=${{ matrix.cudaComputeCap }} + GIT_SHA=${{ env.GITHUB_SHA }} + DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} + ${{matrix.extraBuildArgs}} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max + cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max + + - name: Extract metadata (tags, labels) for Docker + id: meta-grpc + uses: docker/metadata-action@v5 + with: + images: | + ${{ secrets.REGISTRY_URL }}/api-inference/text-embeddings-inference + ghcr.io/huggingface/text-embeddings-inference + flavor: | + latest=false + tags: | + type=semver,pattern=${{ matrix.imageNamePrefix }}{{version}}-grpc + type=semver,pattern=${{ matrix.imageNamePrefix }}{{major}}.{{minor}}-grpc + type=raw,value=${{ matrix.imageNamePrefix }}latest-grpc + type=raw,value=${{ matrix.imageNamePrefix }}sha-${{ env.GITHUB_SHA_SHORT }}-grpc + + - name: Build and push Docker image + id: build-and-push-grpc + uses: docker/build-push-action@v4 + with: + context: . + target: grpc + file: Dockerfile-cuda + push: ${{ github.event_name != 'pull_request' }} + platforms: 'linux/amd64' + build-args: | + SCCACHE_GHA_ENABLED=${{ matrix.sccache }} + ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} + ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} + CUDA_COMPUTE_CAP=${{ matrix.cudaComputeCap }} + GIT_SHA=${{ env.GITHUB_SHA }} + DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} + ${{matrix.extraBuildArgs}} + tags: ${{ steps.meta-grpc.outputs.tags }} + labels: ${{ steps.meta-grpc.outputs.labels }} + cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max diff --git a/.github/workflows/build_75.yaml b/.github/workflows/build_75.yaml deleted file mode 100644 index 20416e14..00000000 --- a/.github/workflows/build_75.yaml +++ /dev/null @@ -1,124 +0,0 @@ - name: Build and push Cuda Turing docker image to registry - - on: - workflow_dispatch: - push: - branches: - - 'main' - tags: - - 'v*' - - jobs: - build-and-push-image: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-75-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci] - permissions: - contents: write - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true - config-inline: | - [registry."docker.io"] - mirrors = ["registry.github-runners.huggingface.tech"] - - - name: Configure sccache - uses: actions/github-script@v6 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta-75 - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=turing-{{version}} - type=semver,pattern=turing-{{major}}.{{minor}} - type=raw,value=turing-latest - type=raw,value=turing-sha-${{ env.GITHUB_SHA_SHORT }} - - - name: Build and push Docker image - id: build-and-push-75 - uses: docker/build-push-action@v4 - with: - context: . - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=75 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - DEFAULT_USE_FLASH_ATTENTION=False - tags: ${{ steps.meta-75.outputs.tags }} - labels: ${{ steps.meta-75.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-75,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-75,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - - - name: Extract metadata (tags, labels) for Docker - id: meta-75-grpc - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=turing-{{version}}-grpc - type=semver,pattern=turing-{{major}}.{{minor}}-grpc - type=raw,value=turing-latest-grpc - type=raw,value=turing-sha-${{ env.GITHUB_SHA_SHORT }}-grpc - - - name: Build and push Docker image - id: build-and-push-75-grpc - uses: docker/build-push-action@v4 - with: - context: . - target: grpc - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=75 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - DEFAULT_USE_FLASH_ATTENTION=False - tags: ${{ steps.meta-75-grpc.outputs.tags }} - labels: ${{ steps.meta-75-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-75,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max diff --git a/.github/workflows/build_80.yaml b/.github/workflows/build_80.yaml deleted file mode 100644 index 7098bad8..00000000 --- a/.github/workflows/build_80.yaml +++ /dev/null @@ -1,134 +0,0 @@ - name: Build and push Cuda Ampere docker image to registry - - on: - workflow_dispatch: - push: - branches: - - 'main' - tags: - - 'v*' - pull_request: - paths: - - ".github/workflows/build.yaml" -# - "integration-tests/**" - - "backends/**" - - "core/**" - - "router/**" - - "Cargo.lock" - - "rust-toolchain.toml" - - "Dockerfile" - branches: - - 'main' - - jobs: - build-and-push-image: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-80-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci] - permissions: - contents: write - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true - config-inline: | - [registry."docker.io"] - mirrors = ["registry.github-runners.huggingface.tech"] - - - name: Configure sccache - uses: actions/github-script@v6 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta-80 - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=raw,value=latest - type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} - - - name: Build and push Docker image - id: build-and-push-80 - uses: docker/build-push-action@v4 - with: - context: . - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=80 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-80.outputs.tags }} - labels: ${{ steps.meta-80.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-80,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-80,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - - - name: Extract metadata (tags, labels) for Docker - id: meta-80-grpc - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern={{version}}-grpc - type=semver,pattern={{major}}.{{minor}}-grpc - type=raw,value=latest-grpc - type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-grpc - - - name: Build and push Docker image - id: build-and-push-80-grpc - uses: docker/build-push-action@v4 - with: - context: . - target: grpc - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=80 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-80-grpc.outputs.tags }} - labels: ${{ steps.meta-80-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-80,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max diff --git a/.github/workflows/build_86.yaml b/.github/workflows/build_86.yaml deleted file mode 100644 index d0b6e5cb..00000000 --- a/.github/workflows/build_86.yaml +++ /dev/null @@ -1,122 +0,0 @@ - name: Build and push Cuda A10 docker image to registry - - on: - workflow_dispatch: - push: - branches: - - 'main' - tags: - - 'v*' - - jobs: - build-and-push-image: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-86-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci] - permissions: - contents: write - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true - config-inline: | - [registry."docker.io"] - mirrors = ["registry.github-runners.huggingface.tech"] - - - name: Configure sccache - uses: actions/github-script@v6 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta-86 - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=86-{{version}} - type=semver,pattern=86-{{major}}.{{minor}} - type=raw,value=86-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=86-sha-${{ env.GITHUB_SHA_SHORT }} - - - name: Build and push Docker image - id: build-and-push-86 - uses: docker/build-push-action@v4 - with: - context: . - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=86 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-86.outputs.tags }} - labels: ${{ steps.meta-86.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-86,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-86,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - - - name: Extract metadata (tags, labels) for Docker - id: meta-86-grpc - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=86-{{version}}-grpc - type=semver,pattern=86-{{major}}.{{minor}}-grpc - type=raw,value=86-latest-grpc - type=raw,value=86-sha-${{ env.GITHUB_SHA_SHORT }}-grpc - - - name: Build and push Docker image - id: build-and-push-86-grpc - uses: docker/build-push-action@v4 - with: - context: . - target: grpc - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=86 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-86-grpc.outputs.tags }} - labels: ${{ steps.meta-86-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-86,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max diff --git a/.github/workflows/build_89.yaml b/.github/workflows/build_89.yaml deleted file mode 100644 index 5a9e0752..00000000 --- a/.github/workflows/build_89.yaml +++ /dev/null @@ -1,122 +0,0 @@ - name: Build and push Cuda RTX 4000 series docker image to registry - - on: - workflow_dispatch: - push: - branches: - - 'main' - tags: - - 'v*' - - jobs: - build-and-push-image: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-89-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci] - permissions: - contents: write - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true - config-inline: | - [registry."docker.io"] - mirrors = ["registry.github-runners.huggingface.tech"] - - - name: Configure sccache - uses: actions/github-script@v6 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta-89 - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=89-{{version}} - type=semver,pattern=89-{{major}}.{{minor}} - type=raw,value=89-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=89-sha-${{ env.GITHUB_SHA_SHORT }} - - - name: Build and push Docker image - id: build-and-push-89 - uses: docker/build-push-action@v4 - with: - context: . - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=89 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-89.outputs.tags }} - labels: ${{ steps.meta-89.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-89,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-89,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - - - name: Extract metadata (tags, labels) for Docker - id: meta-89-grpc - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=89-{{version}}-grpc - type=semver,pattern=89-{{major}}.{{minor}}-grpc - type=raw,value=89-latest-grpc - type=raw,value=89-sha-${{ env.GITHUB_SHA_SHORT }}-grpc - - - name: Build and push Docker image - id: build-and-push-89-grpc - uses: docker/build-push-action@v4 - with: - context: . - target: grpc - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=89 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-89-grpc.outputs.tags }} - labels: ${{ steps.meta-89-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-89,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max diff --git a/.github/workflows/build_90.yaml b/.github/workflows/build_90.yaml deleted file mode 100644 index e7801f45..00000000 --- a/.github/workflows/build_90.yaml +++ /dev/null @@ -1,122 +0,0 @@ - name: Build and push Cuda Hopper docker image to registry - - on: - workflow_dispatch: - push: - branches: - - 'main' - tags: - - 'v*' - - jobs: - build-and-push-image: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-90-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci] - permissions: - contents: write - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true - config-inline: | - [registry."docker.io"] - mirrors = ["registry.github-runners.huggingface.tech"] - - - name: Configure sccache - uses: actions/github-script@v6 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta-90 - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=hopper-{{version}} - type=semver,pattern=hopper-{{major}}.{{minor}} - type=raw,value=hopper-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=hopper-sha-${{ env.GITHUB_SHA_SHORT }} - - - name: Build and push Docker image - id: build-and-push-90 - uses: docker/build-push-action@v4 - with: - context: . - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=90 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-90.outputs.tags }} - labels: ${{ steps.meta-90.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-90,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-90,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - - - name: Extract metadata (tags, labels) for Docker - id: meta-90-grpc - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=hopper-{{version}}-grpc - type=semver,pattern=hopper-{{major}}.{{minor}}-grpc - type=raw,value=hopper-latest-grpc - type=raw,value=hopper-sha-${{ env.GITHUB_SHA_SHORT }}-grpc - - - name: Build and push Docker image - id: build-and-push-90-grpc - uses: docker/build-push-action@v4 - with: - context: . - target: grpc - file: Dockerfile-cuda - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - CUDA_COMPUTE_CAP=90 - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-90-grpc.outputs.tags }} - labels: ${{ steps.meta-90-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-90,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max diff --git a/.github/workflows/build_all.yaml b/.github/workflows/build_all.yaml deleted file mode 100644 index f5a6510a..00000000 --- a/.github/workflows/build_all.yaml +++ /dev/null @@ -1,76 +0,0 @@ - name: Build and push Cuda docker image to registry - - on: - workflow_dispatch: - push: - branches: - - 'main' - tags: - - 'v*' - - jobs: - build-and-push-image: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-all-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci] - permissions: - contents: write - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true - config-inline: | - [registry."docker.io"] - mirrors = ["registry.github-runners.huggingface.tech"] - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=cuda-{{version}} - type=semver,pattern=cuda-{{major}}.{{minor}} - type=raw,value=cuda-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=cuda-sha-${{ env.GITHUB_SHA_SHORT }} - - - name: Build and push Docker image - id: build-and-push - uses: docker/build-push-action@v4 - with: - context: . - file: Dockerfile-cuda-all - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-all,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-all,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max diff --git a/.github/workflows/build_cpu.yaml b/.github/workflows/build_cpu.yaml deleted file mode 100644 index 032abbbf..00000000 --- a/.github/workflows/build_cpu.yaml +++ /dev/null @@ -1,132 +0,0 @@ - name: Build and push CPU docker image to registry - - on: - workflow_dispatch: - push: - branches: - - 'main' - tags: - - 'v*' - pull_request: - paths: - - ".github/workflows/build.yaml" -# - "integration-tests/**" - - "backends/**" - - "core/**" - - "router/**" - - "Cargo.lock" - - "rust-toolchain.toml" - - "Dockerfile" - branches: - - 'main' - - jobs: - build-and-push-image: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-cpu-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci] - permissions: - contents: write - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - security-events: write - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Configure sccache - uses: actions/github-script@v6 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true - config-inline: | - [registry."docker.io"] - mirrors = ["registry.github-runners.huggingface.tech"] - - - name: Login to GitHub Container Registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta-cpu - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=cpu-{{version}} - type=semver,pattern=cpu-{{major}}.{{minor}} - type=raw,value=cpu-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=cpu-sha-${{ env.GITHUB_SHA_SHORT }} - - - name: Build and push Docker image - id: build-and-push-cpu - uses: docker/build-push-action@v4 - with: - context: . - file: Dockerfile - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-cpu.outputs.tags }} - labels: ${{ steps.meta-cpu.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-cpu,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-cpu,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - - - name: Extract metadata (tags, labels) for Docker - id: meta-cpu-grpc - uses: docker/metadata-action@v4.3.0 - with: - images: | - registry-push.github-runners.huggingface.tech/api-inference/text-embeddings-inference - ghcr.io/huggingface/text-embeddings-inference - flavor: | - latest=false - tags: | - type=semver,pattern=cpu-{{version}}-grpc - type=semver,pattern=cpu-{{major}}.{{minor}}-grpc - type=raw,value=cpu-latest-grpc - type=raw,value=cpu-sha-${{ env.GITHUB_SHA_SHORT }}-grpc - - - name: Build and push Docker image - id: build-and-push-cpu-grpc - uses: docker/build-push-action@v4 - with: - context: . - target: grpc - file: Dockerfile - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - SCCACHE_GHA_ENABLED=on - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} - GIT_SHA=${{ env.GITHUB_SHA }} - DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} - tags: ${{ steps.meta-cpu-grpc.outputs.tags }} - labels: ${{ steps.meta-cpu-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-cpu,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max diff --git a/.github/workflows/matrix.json b/.github/workflows/matrix.json new file mode 100644 index 00000000..c83fcf34 --- /dev/null +++ b/.github/workflows/matrix.json @@ -0,0 +1,57 @@ +[ + { + "name": "turing", + "imageNamePrefix": "turing-", + "runOn": "main", + "sccache": true, + "cudaComputeCap": 75, + "extraBuildArgs": "DEFAULT_USE_FLASH_ATTENTION=False", + "dockerfile": "Dockerfile-cuda" + }, + { + "name": "ampere", + "imageNamePrefix": "", + "runOn": "always", + "sccache": true, + "cudaComputeCap": 80, + "dockerfile": "Dockerfile-cuda" + }, + { + "name": "a10", + "imageNamePrefix": "86-", + "runOn": "main", + "sccache": true, + "cudaComputeCap": 86, + "dockerfile": "Dockerfile-cuda" + }, + { + "name": "RTX 4000", + "imageNamePrefix": "89-", + "runOn": "main", + "sccache": true, + "cudaComputeCap": 89, + "dockerfile": "Dockerfile-cuda" + }, + { + "name": "Hopper", + "imageNamePrefix": "hopper-", + "runOn": "main", + "sccache": true, + "cudaComputeCap": 90, + "dockerfile": "Dockerfile-cuda" + }, + { + "name": "All", + "imageNamePrefix": "cuda-", + "runOn": "main", + "sccache": false, + "dockerfile": "Dockerfile-cuda-all" + }, + { + "name": "cpu", + "imageNamePrefix": "cpu-", + "runOn": "main", + "sccache": true, + "dockerfile": "Dockerfile" + } +]