From 44701c1c8eab6ac84c2fafc9ae505a6ac68dfe73 Mon Sep 17 00:00:00 2001 From: Andreas Auernhammer Date: Wed, 29 Jan 2025 22:56:16 +0100 Subject: [PATCH] vault: limit token delay to not exceed token TTL This commit fixes a bug in the Vault token renewal. When KES renews its current token `T1` and receives a new token `T2`, KES waits a certain amount of time before using `T2` to account for replication lag between Vault nodes. Vault tokens are not signed but static secrets. Each Vault node in a dist. cluster needs to know the token before being able to verify it. Without the usage delay described above, KES might send a request to a Vault node that has not received the new token `T2`, yet. Now, KES must also not wait longer then the remaining TTL of the current token `T1`. Otherwise, `T1` expires BEFORE KES starts using `T2`. This results in auth errors like the following: ``` Error making API request.\n\nURL: GET http://127.0.0.1:8200/v1/kv/data/my-key3\nCode: 403. Errors:\n\n* 2 errors occurred:\n\t* permission denied\n\t* invalid token\n\n" req.method=POST req.path=/v1/key/create/my-key3 req.ip=127.0.0.1 req.identity=a49fea12c5d1c69f1eba1e4697e62ccdbe389a80f317191892711b47d83c3e85 ``` This commit limits the max. time KES waits before using the new token `T2` to either half the remaining TTL of `T1` or 30s - whatever is shorter. This ensures that `T1` is still valid once KES switches to `T2`. Signed-off-by: Andreas Auernhammer --- .github/workflows/go.yml | 10 ++++------ .github/workflows/release.yml | 3 +-- go.mod | 2 ++ internal/keystore/vault/client.go | 14 +++++++++++--- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 87acfe44..1b776d9c 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -16,8 +16,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.22.7 - check-latest: true + go-version: 1.22.0 id: go - name: Check out code uses: actions/checkout@v4 @@ -34,7 +33,7 @@ jobs: - name: "Set up Go" uses: actions/setup-go@v5 with: - go-version: 1.22.7 + go-version: 1.22.0 id: go - name: Check out code uses: actions/checkout@v4 @@ -54,8 +53,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.22.7 - check-latest: true + go-version: 1.22.0 id: go - name: Check out code uses: actions/checkout@v4 @@ -70,7 +68,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go-version: [1.22.7, 1.23.1] + go-version: [1.22.0, 1.23.0] steps: - name: Set up Go ${{ matrix.go-version }} uses: actions/setup-go@v5 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index acd9ede5..51855bfe 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,8 +20,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.22.7 - check-latest: true + go-version: 1.22.0 - name: Set up QEMU uses: docker/setup-qemu-action@v1 - name: Set up Docker Buildx diff --git a/go.mod b/go.mod index 427047ed..4de37616 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,8 @@ module github.com/minio/kes go 1.21 +toolchain go1.22.11 + require ( aead.dev/mem v0.2.0 aead.dev/minisign v0.2.1 diff --git a/internal/keystore/vault/client.go b/internal/keystore/vault/client.go index 9df392da..6034117a 100644 --- a/internal/keystore/vault/client.go +++ b/internal/keystore/vault/client.go @@ -7,6 +7,7 @@ package vault import ( "context" "errors" + "net/http" "os" "path" "strings" @@ -195,8 +196,10 @@ func (c *client) RenewToken(ctx context.Context, authenticate authFunc, secret * continue } - renewIn := 80 * (ttl / 100) // Renew token after 80% of its TTL has passed - ttl = 0 // Set TTL to zero to trigger an immediate re-authentication in case of auth failure + renewIn := 80 * (ttl / 100) // Renew token after 80% of its TTL has passed + delay := min((ttl-renewIn)/2, Delay) // Delay usage of renewed token but not beyond expiry + ttl = 0 + select { case <-ctx.Done(): return @@ -210,6 +213,9 @@ func (c *client) RenewToken(ctx context.Context, authenticate authFunc, secret * if err == nil { break } + if resp, ok := err.(*vaultapi.ResponseError); ok && http.StatusBadRequest <= resp.StatusCode && resp.StatusCode < http.StatusInternalServerError { + break // Don't retry on 4xx responses + } } if s == nil { s, _ = authenticate(ctx) @@ -225,10 +231,12 @@ func (c *client) RenewToken(ctx context.Context, authenticate authFunc, secret * // Wait before we use the new auth. token. This accounts // for replication lag between the Vault nodes and allows // them to sync the token across the entire cluster. + // However, we must not wait longer than the remaining lifetime + // of the currently used token. select { case <-ctx.Done(): return - case <-time.After(Delay): + case <-time.After(delay): } c.SetToken(token) // SetToken is safe to call from different go routines }