From 8aea930151c711b5d9789e535d60301906ee169c Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Wed, 29 Jan 2025 10:35:53 +0100 Subject: [PATCH 1/9] (FIX): CI Security Fix - branchname injection --- .github/workflows/build.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 8a2880a8..fa0f95f5 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -31,8 +31,10 @@ jobs: uses: actions/checkout@v3 - id: set-matrix + env: + GITHUB_REF: ${{ github.ref }} run: | - branchName=$(echo '${{ github.ref }}' | sed 's,refs/heads/,,g') + branchName=$(echo $GITHUB_REF | sed 's,refs/heads/,,g') matrix=$(jq --arg branchName "$branchName" 'map(. | select((.runOn==$branchName) or (.runOn=="always")) )' .github/workflows/matrix.json) echo "{\"include\":$(echo $matrix)}" echo ::set-output name=matrix::{\"include\":$(echo $matrix)}\" From 40bd2e3c3ab1fb5cd98dbcaa616268affc1b207a Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Wed, 29 Jan 2025 12:08:12 +0100 Subject: [PATCH 2/9] Change AWS Auth + disable build summary --- .github/workflows/build.yaml | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index fa0f95f5..442f5634 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -91,6 +91,15 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: configure aws credentials + id: aws-creds + uses: aws-actions/configure-aws-credentials@4fc4975a852c8cd99761e2de1f4ba73402e44dd9 + with: + role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_BUILDX_CACHE }} + role-duration-seconds: 7200 + aws-region: us-east-1 + output-credentials: true + - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v5 @@ -105,10 +114,12 @@ jobs: type=semver,pattern=${{ matrix.imageNamePrefix }}{{major}}.{{minor}} type=raw,value=${{ matrix.imageNamePrefix }}latest type=raw,value=${{ matrix.imageNamePrefix }}sha-${{ env.GITHUB_SHA_SHORT }} - + - name: Build and push Docker image id: build-and-push uses: docker/build-push-action@v6 + env: + DOCKER_BUILD_SUMMARY: false with: context: . file: ${{ matrix.dockerfile }} @@ -124,8 +135,8 @@ jobs: ${{matrix.extraBuildArgs}} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max - cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max + cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},mode=max + cache-to: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},mode=max - name: Extract metadata (tags, labels) for Docker id: meta-grpc @@ -147,6 +158,8 @@ jobs: id: build-and-push-grpc if: ${{ matrix.grpc }} uses: docker/build-push-action@v6 + env: + DOCKER_BUILD_SUMMARY: false with: context: . target: grpc @@ -163,4 +176,4 @@ jobs: ${{matrix.extraBuildArgs}} tags: ${{ steps.meta-grpc.outputs.tags }} labels: ${{ steps.meta-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max + cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},mode=max From 2ce2a0c16a8a5fb77961c0b0132058a4d08ffdf0 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Wed, 29 Jan 2025 12:26:53 +0100 Subject: [PATCH 3/9] WIP aws oidc --- .github/workflows/build.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 442f5634..d6469fec 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -135,8 +135,8 @@ jobs: ${{matrix.extraBuildArgs}} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},mode=max - cache-to: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},mode=max + cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.creds.outputs.aws-session-token }},mode=max + cache-to: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.creds.outputs.aws-session-token }},mode=max - name: Extract metadata (tags, labels) for Docker id: meta-grpc @@ -176,4 +176,4 @@ jobs: ${{matrix.extraBuildArgs}} tags: ${{ steps.meta-grpc.outputs.tags }} labels: ${{ steps.meta-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},mode=max + cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.creds.outputs.aws-session-token }},mode=max From 2ace258cda3009756e042b79fcd45ea292d9215d Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Wed, 29 Jan 2025 13:07:24 +0100 Subject: [PATCH 4/9] WIP --- .github/workflows/build.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index d6469fec..0ef095b9 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -135,8 +135,8 @@ jobs: ${{matrix.extraBuildArgs}} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.creds.outputs.aws-session-token }},mode=max - cache-to: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.creds.outputs.aws-session-token }},mode=max + cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.aws-creds.outputs.aws-session-token }},mode=max + cache-to: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.aws-creds.outputs.aws-session-token }},mode=max - name: Extract metadata (tags, labels) for Docker id: meta-grpc @@ -176,4 +176,4 @@ jobs: ${{matrix.extraBuildArgs}} tags: ${{ steps.meta-grpc.outputs.tags }} labels: ${{ steps.meta-grpc.outputs.labels }} - cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.creds.outputs.aws-session-token }},mode=max + cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.aws-creds.outputs.aws-session-token }},mode=max From 5ad1cf289f7c169d6a31e0658aed74b344a009f8 Mon Sep 17 00:00:00 2001 From: glegendre01 Date: Wed, 29 Jan 2025 16:10:21 +0100 Subject: [PATCH 5/9] CI Security Fix : expose action cache url and runtime as secrets --- .github/workflows/build.yaml | 10 ++++++---- Dockerfile | 10 ++++++---- Dockerfile-cuda | 10 ++++++---- Dockerfile-cuda-all | 14 +++++++++----- Dockerfile-intel | 10 ++++++---- 5 files changed, 33 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0ef095b9..202012e3 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -127,12 +127,13 @@ jobs: platforms: 'linux/amd64' build-args: | SCCACHE_GHA_ENABLED=${{ matrix.sccache }} - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} CUDA_COMPUTE_CAP=${{ matrix.cudaComputeCap }} GIT_SHA=${{ env.GITHUB_SHA }} DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} ${{matrix.extraBuildArgs}} + secrets: | + actions_cache_url=${{ env.ACTIONS_CACHE_URL }} + actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.aws-creds.outputs.aws-session-token }},mode=max @@ -168,12 +169,13 @@ jobs: platforms: 'linux/amd64' build-args: | SCCACHE_GHA_ENABLED=${{ matrix.sccache }} - ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} - ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} CUDA_COMPUTE_CAP=${{ matrix.cudaComputeCap }} GIT_SHA=${{ env.GITHUB_SHA }} DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} ${{matrix.extraBuildArgs}} + secrets: | + actions_cache_url=${{ env.ACTIONS_CACHE_URL }} + actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }} tags: ${{ steps.meta-grpc.outputs.tags }} labels: ${{ steps.meta-grpc.outputs.labels }} cache-from: type=s3,region=us-east-1,bucket=${{ vars.AWS_S3BUCKET_GITHUB_BUILDX_CACHE }},name=text-embeddings-inference-cache-${{matrix.name}},access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }},secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }},session_token=${{ steps.aws-creds.outputs.aws-session-token }},mode=max diff --git a/Dockerfile b/Dockerfile index b886360b..b5ca01fb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,8 +24,6 @@ ARG GIT_SHA ARG DOCKER_LABEL # sccache specific variables -ARG ACTIONS_CACHE_URL -ARG ACTIONS_RUNTIME_TOKEN ARG SCCACHE_GHA_ENABLED RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ @@ -53,7 +51,9 @@ COPY Cargo.lock ./ FROM builder AS http-builder -RUN cargo build --release --bin text-embeddings-router -F ort -F candle -F mkl-dynamic -F http --no-default-features && sccache -s +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + cargo build --release --bin text-embeddings-router -F ort -F candle -F mkl-dynamic -F http --no-default-features && sccache -s FROM builder AS grpc-builder @@ -65,7 +65,9 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ COPY proto proto -RUN cargo build --release --bin text-embeddings-router -F grpc -F ort -F candle -F mkl-dynamic --no-default-features && sccache -s +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + cargo build --release --bin text-embeddings-router -F grpc -F ort -F candle -F mkl-dynamic --no-default-features && sccache -s FROM debian:bookworm-slim AS base diff --git a/Dockerfile-cuda b/Dockerfile-cuda index 1e39fb1e..00026b83 100644 --- a/Dockerfile-cuda +++ b/Dockerfile-cuda @@ -41,8 +41,6 @@ ARG CARGO_BUILD_JOBS ARG CARGO_BUILD_INCREMENTAL # sccache specific variables -ARG ACTIONS_CACHE_URL -ARG ACTIONS_RUNTIME_TOKEN ARG SCCACHE_GHA_ENABLED WORKDIR /usr/src @@ -77,7 +75,9 @@ COPY Cargo.lock ./ FROM builder AS http-builder -RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ then \ cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F http --no-default-features && sccache -s; \ else \ @@ -98,7 +98,9 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ COPY proto proto -RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ then \ cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F grpc --no-default-features && sccache -s; \ else \ diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 851a3c97..aba5a615 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -36,8 +36,6 @@ ARG DOCKER_LABEL ARG VERTEX="false" # sccache specific variables -ARG ACTIONS_CACHE_URL -ARG ACTIONS_RUNTIME_TOKEN ARG SCCACHE_GHA_ENABLED # Limit parallelism @@ -83,7 +81,9 @@ COPY router router COPY Cargo.toml ./ COPY Cargo.lock ./ -RUN if [ $VERTEX = "true" ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ then \ CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s; \ else \ @@ -92,7 +92,9 @@ RUN if [ $VERTEX = "true" ]; \ RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 -RUN if [ $VERTEX = "true" ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ then \ CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ else \ @@ -101,7 +103,9 @@ RUN if [ $VERTEX = "true" ]; \ RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 -RUN if [ $VERTEX = "true" ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ then \ CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ else \ diff --git a/Dockerfile-intel b/Dockerfile-intel index 4b0e7e04..e1cee734 100644 --- a/Dockerfile-intel +++ b/Dockerfile-intel @@ -24,8 +24,6 @@ ARG GIT_SHA ARG DOCKER_LABEL # sccache specific variables -ARG ACTIONS_CACHE_URL -ARG ACTIONS_RUNTIME_TOKEN ARG SCCACHE_GHA_ENABLED COPY --from=planner /usr/src/recipe.json recipe.json @@ -46,13 +44,17 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ FROM builder as http-builder -RUN cargo build --release --bin text-embeddings-router -F python -F http --no-default-features && sccache -s +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + cargo build --release --bin text-embeddings-router -F python -F http --no-default-features && sccache -s FROM builder as grpc-builder COPY proto proto -RUN cargo build --release --bin text-embeddings-router -F grpc -F python --no-default-features && sccache -s +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + cargo build --release --bin text-embeddings-router -F grpc -F python --no-default-features && sccache -s FROM intel/intel-optimized-pytorch:2.4.0-pip-base AS cpu ENV HUGGINGFACE_HUB_CACHE=/data \ From 90078008ebc5b045780775947a168d346fb04d0a Mon Sep 17 00:00:00 2001 From: glegendre01 Date: Wed, 29 Jan 2025 16:17:02 +0100 Subject: [PATCH 6/9] WIP --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b5ca01fb..87b6293e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,9 @@ RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \ COPY --from=planner /usr/src/recipe.json recipe.json -RUN cargo chef cook --release --features ort --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + cargo chef cook --release --features ort --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s COPY backends backends COPY core core From 37565fcf944f2d00542d8ca25be202da55e2600b Mon Sep 17 00:00:00 2001 From: glegendre01 Date: Wed, 29 Jan 2025 16:33:57 +0100 Subject: [PATCH 7/9] WIP --- Dockerfile-cuda | 8 ++++++-- Dockerfile-cuda-all | 16 ++++++++++++---- Dockerfile-intel | 4 +++- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/Dockerfile-cuda b/Dockerfile-cuda index 00026b83..efd19bb8 100644 --- a/Dockerfile-cuda +++ b/Dockerfile-cuda @@ -45,7 +45,9 @@ ARG SCCACHE_GHA_ENABLED WORKDIR /usr/src -RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ then \ nvprune --generate-code code=sm_${CUDA_COMPUTE_CAP} /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \ elif [ ${CUDA_COMPUTE_CAP} -ge 80 -a ${CUDA_COMPUTE_CAP} -lt 90 ]; \ @@ -60,7 +62,9 @@ RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ COPY --from=planner /usr/src/recipe.json recipe.json -RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \ then \ cargo chef cook --release --features candle-cuda-turing --features static-linking --no-default-features --recipe-path recipe.json && sccache -s; \ else \ diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index aba5a615..8db86cd9 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -47,28 +47,36 @@ WORKDIR /usr/src COPY --from=planner /usr/src/recipe.json recipe.json -RUN if [ $VERTEX = "true" ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ then \ cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \ else \ cargo chef cook --release --recipe-path recipe.json && sccache -s; \ fi; -RUN if [ $VERTEX = "true" ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ then \ CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ else \ CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ fi; -RUN if [ $VERTEX = "true" ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ then \ CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ else \ CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ fi; -RUN if [ $VERTEX = "true" ]; \ +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ then \ CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ else \ diff --git a/Dockerfile-intel b/Dockerfile-intel index e1cee734..3412bf2c 100644 --- a/Dockerfile-intel +++ b/Dockerfile-intel @@ -28,7 +28,9 @@ ARG SCCACHE_GHA_ENABLED COPY --from=planner /usr/src/recipe.json recipe.json -RUN cargo chef cook --release --features python --no-default-features --recipe-path recipe.json && sccache -s +RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + cargo chef cook --release --features python --no-default-features --recipe-path recipe.json && sccache -s COPY backends backends COPY core core From 89bbb510f23ca31ca8957aa71725a26cb6da2fe7 Mon Sep 17 00:00:00 2001 From: glegendre01 Date: Wed, 29 Jan 2025 21:01:35 +0100 Subject: [PATCH 8/9] run FMT --- .github/workflows/build.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 202012e3..1c41b611 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -99,7 +99,7 @@ jobs: role-duration-seconds: 7200 aws-region: us-east-1 output-credentials: true - + - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v5 @@ -114,11 +114,11 @@ jobs: type=semver,pattern=${{ matrix.imageNamePrefix }}{{major}}.{{minor}} type=raw,value=${{ matrix.imageNamePrefix }}latest type=raw,value=${{ matrix.imageNamePrefix }}sha-${{ env.GITHUB_SHA_SHORT }} - + - name: Build and push Docker image id: build-and-push uses: docker/build-push-action@v6 - env: + env: DOCKER_BUILD_SUMMARY: false with: context: . @@ -159,7 +159,7 @@ jobs: id: build-and-push-grpc if: ${{ matrix.grpc }} uses: docker/build-push-action@v6 - env: + env: DOCKER_BUILD_SUMMARY: false with: context: . From 2579bfb564156c7ad916f56a2e5404245c404bc8 Mon Sep 17 00:00:00 2001 From: glegendre01 Date: Wed, 29 Jan 2025 21:07:57 +0100 Subject: [PATCH 9/9] fmt --- backends/proto/embed.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/proto/embed.proto b/backends/proto/embed.proto index 84212a2d..036f3db4 100644 --- a/backends/proto/embed.proto +++ b/backends/proto/embed.proto @@ -37,4 +37,4 @@ message Score { message PredictResponse { repeated Score scores = 1; -} \ No newline at end of file +}