From 0a916a470d72ee7a3c52162756aaad4aab6c513f Mon Sep 17 00:00:00 2001 From: "Ariel Shaqed (Scolnicov)" Date: Thu, 28 Mar 2024 22:31:45 +0200 Subject: [PATCH] Use existing 6MiB file for S3 multipart copy test on Azure ADLS2 (#7609) * Use existing 6MiB file for S3 multipart copy test on Azure ADLS2 Fixes #7485: it just takes too long to upload some _new_ data there. * [bug] Use correct path for existing 6 MiB object on ADLS gen2 --- .github/workflows/esti.yaml | 1 + esti/ops/docker-compose-external-db.yaml | 1 + esti/s3_gateway_test.go | 58 ++++++++++++++++++------ pkg/testutil/setup.go | 1 + 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/.github/workflows/esti.yaml b/.github/workflows/esti.yaml index 8f02997da8d..c965900b0d2 100644 --- a/.github/workflows/esti.yaml +++ b/.github/workflows/esti.yaml @@ -956,6 +956,7 @@ jobs: LAKEFS_BLOCKSTORE_TYPE: azure ESTI_BLOCKSTORE_TYPE: azure ESTI_STORAGE_NAMESPACE: https://esti4hns.blob.core.windows.net/esti-system-testing/${{ github.run_number }}/${{ steps.unique.outputs.value }} + ESTI_LARGE_OBJECT_PATH: https://esti4hns.blob.core.windows.net/esti-system-testing-data/copy-test-data/data.6mib ESTI_AZURE_STORAGE_ACCOUNT: esti4hns ESTI_AZURE_STORAGE_ACCESS_KEY: ${{ secrets.LAKEFS_BLOCKSTORE_AZURE_STORAGE_GEN2_ACCESS_KEY }} diff --git a/esti/ops/docker-compose-external-db.yaml b/esti/ops/docker-compose-external-db.yaml index 62dddeb0639..7142df73e1b 100644 --- a/esti/ops/docker-compose-external-db.yaml +++ b/esti/ops/docker-compose-external-db.yaml @@ -50,6 +50,7 @@ services: - ESTI_LAKECTL_DIR=/app - ESTI_GOTEST_FLAGS - ESTI_FLAGS + - ESTI_LARGE_OBJECT_PATH - ESTI_FORCE_PATH_STYLE - ESTI_AZURE_STORAGE_ACCOUNT - ESTI_AZURE_STORAGE_ACCESS_KEY diff --git a/esti/s3_gateway_test.go b/esti/s3_gateway_test.go index c7031407822..db1ab98d475 100644 --- a/esti/s3_gateway_test.go +++ b/esti/s3_gateway_test.go @@ -2,6 +2,8 @@ package esti import ( "bytes" + "context" + "fmt" "io" "math/rand" "net/http" @@ -28,7 +30,8 @@ type GetCredentials = func(id, secret, token string) *credentials.Credentials const ( numUploads = 100 randomDataPathLength = 1020 - gatewayTestPrefix = "main/data/" + branch = "main" + gatewayTestPrefix = branch + "/data/" ) func newMinioClient(t *testing.T, getCredentials GetCredentials) *minio.Client { @@ -329,6 +332,40 @@ func TestS3HeadBucket(t *testing.T) { }) } +// getOrCreatePathToLargeObject returns a configured existing large +// (largeDataContentLength, 6MiB) object, or creates a new one under +// testPrefix. +func getOrCreatePathToLargeObject(t *testing.T, ctx context.Context, s3lakefsClient *minio.Client, repo, branch string) (string, int64) { + t.Helper() + + path := "source-file" + s3Path := fmt.Sprintf("%s/source-file", branch) + + if physicalAddress := viper.GetString("large_object_path"); physicalAddress != "" { + res, err := client.LinkPhysicalAddressWithResponse(ctx, repo, branch, &apigen.LinkPhysicalAddressParams{Path: path}, apigen.LinkPhysicalAddressJSONRequestBody{ + Checksum: "dont-care", + // TODO(ariels): Check actual length of object! + SizeBytes: largeDataContentLength, + Staging: apigen.StagingLocation{ + PhysicalAddress: &physicalAddress, + }, + }) + require.NoError(t, err) + require.NotNil(t, res.JSON200) + return s3Path, largeDataContentLength + } + + // content + r := rand.New(rand.NewSource(17)) + objContent := testutil.NewRandomReader(r, largeDataContentLength) + + // upload data + _, err := s3lakefsClient.PutObject(ctx, repo, s3Path, objContent, largeDataContentLength, + minio.PutObjectOptions{}) + require.NoError(t, err) + return s3Path, largeDataContentLength +} + func TestS3CopyObjectMultipart(t *testing.T) { ctx, _, repo := setupTest(t) defer tearDownTest(repo) @@ -338,17 +375,10 @@ func TestS3CopyObjectMultipart(t *testing.T) { destRepo := createRepositoryByName(ctx, t, destRepoName) defer deleteRepositoryIfAskedTo(ctx, destRepoName) - // content - r := rand.New(rand.NewSource(17)) - objContent := testutil.NewRandomReader(r, largeDataContentLength) - srcPath := gatewayTestPrefix + "source-file" - destPath := gatewayTestPrefix + "dest-file" - - // upload data s3lakefsClient := newMinioClient(t, credentials.NewStaticV4) - _, err := s3lakefsClient.PutObject(ctx, repo, srcPath, objContent, largeDataContentLength, - minio.PutObjectOptions{}) - require.NoError(t, err) + + srcPath, objectLength := getOrCreatePathToLargeObject(t, ctx, s3lakefsClient, repo, branch) + destPath := gatewayTestPrefix + "dest-file" dest := minio.CopyDestOptions{ Bucket: destRepo, @@ -367,7 +397,7 @@ func TestS3CopyObjectMultipart(t *testing.T) { Object: srcPath, MatchRange: true, Start: minDataContentLengthForMultipart, - End: largeDataContentLength - 1, + End: objectLength - 1, }, } @@ -376,8 +406,8 @@ func TestS3CopyObjectMultipart(t *testing.T) { t.Fatalf("minio.Client.ComposeObject from(%+v) to(%+v): %s", srcs, dest, err) } - if ui.Size != largeDataContentLength { - t.Errorf("Copied %d bytes when expecting %d", ui.Size, largeDataContentLength) + if ui.Size != objectLength { + t.Errorf("Copied %d bytes when expecting %d", ui.Size, objectLength) } // Comparing 2 readers is too much work. Instead just hash them. diff --git a/pkg/testutil/setup.go b/pkg/testutil/setup.go index a1f50a75004..5e3897de9fc 100644 --- a/pkg/testutil/setup.go +++ b/pkg/testutil/setup.go @@ -59,6 +59,7 @@ func SetupTestingEnv(params *SetupTestingEnvParams) (logging.Logger, apigen.Clie viper.SetDefault("lakectl_dir", filepath.Join(currDir, "..")) viper.SetDefault("azure_storage_account", "") viper.SetDefault("azure_storage_access_key", "") + viper.SetDefault("large_object_path", "") err = viper.ReadInConfig() if err != nil && !errors.As(err, &viper.ConfigFileNotFoundError{}) { logger.WithError(err).Fatal("Failed to read configuration")