From 6da2f79fe2d69e3d32ca372c4ae407ab66002872 Mon Sep 17 00:00:00 2001 From: Kang Date: Thu, 27 Feb 2025 10:56:19 +0800 Subject: [PATCH] [#6504] feat(iceberg): Support s3 path-style-access properties for Iceberg catalog and Iceberg REST server (#6541) ### What changes were proposed in this pull request? Support s3 path-style-access properties in Iceberg REST server ### Why are the changes needed? Fix: #6504 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? Manual --- .../catalog/lakehouse/iceberg/IcebergConstants.java | 1 + .../lakehouse/iceberg/IcebergPropertiesUtils.java | 2 ++ .../org/apache/gravitino/storage/S3Properties.java | 3 +++ docs/iceberg-rest-service.md | 11 ++++++----- docs/lakehouse-iceberg-catalog.md | 2 ++ docs/spark-connector/spark-catalog-iceberg.md | 1 + .../gravitino/iceberg/common/IcebergConfig.java | 7 +++++++ 7 files changed, 22 insertions(+), 5 deletions(-) diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergConstants.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergConstants.java index 214f3811379..25a7fda925d 100644 --- a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergConstants.java +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergConstants.java @@ -41,6 +41,7 @@ public class IcebergConstants { public static final String ICEBERG_S3_ACCESS_KEY_ID = "s3.access-key-id"; public static final String ICEBERG_S3_SECRET_ACCESS_KEY = "s3.secret-access-key"; public static final String ICEBERG_S3_TOKEN = "s3.session-token"; + public static final String ICEBERG_S3_PATH_STYLE_ACCESS = "s3.path-style-access"; public static final String AWS_S3_REGION = "client.region"; public static final String ICEBERG_OSS_ENDPOINT = "oss.endpoint"; diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergPropertiesUtils.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergPropertiesUtils.java index 92c5d18a129..dc2ad6d1a0d 100644 --- a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergPropertiesUtils.java +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/iceberg/IcebergPropertiesUtils.java @@ -52,6 +52,8 @@ public class IcebergPropertiesUtils { map.put(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, IcebergConstants.ICEBERG_S3_ACCESS_KEY_ID); map.put( S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, IcebergConstants.ICEBERG_S3_SECRET_ACCESS_KEY); + map.put( + S3Properties.GRAVITINO_S3_PATH_STYLE_ACCESS, IcebergConstants.ICEBERG_S3_PATH_STYLE_ACCESS); // OSS map.put(OSSProperties.GRAVITINO_OSS_ENDPOINT, IcebergConstants.ICEBERG_OSS_ENDPOINT); map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_ID, IcebergConstants.ICEBERG_OSS_ACCESS_KEY_ID); diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/S3Properties.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/S3Properties.java index cfb342c5b5f..8d70eacb313 100644 --- a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/S3Properties.java +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/S3Properties.java @@ -40,5 +40,8 @@ public class S3Properties { // The S3 credentials provider class name. public static final String GRAVITINO_S3_CREDS_PROVIDER = "s3-creds-provider"; + // The S3 path style access flag. + public static final String GRAVITINO_S3_PATH_STYLE_ACCESS = "s3-path-style-access"; + private S3Properties() {} } diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index 6ea4a8bb05a..9a0c1d159b7 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -107,11 +107,12 @@ Please refer to [Credential vending](./security/credential-vending.md) for more #### S3 configuration -| Configuration item | Description | Default value | Required | Since Version | -|----------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------------------------|------------------| -| `gravitino.iceberg-rest.io-impl` | The IO implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aws.s3.S3FileIO` for S3. | (none) | No | 0.6.0-incubating | -| `gravitino.iceberg-rest.s3-endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | (none) | No | 0.6.0-incubating | -| `gravitino.iceberg-rest.s3-region` | The region of the S3 service, like `us-west-2`. | (none) | No | 0.6.0-incubating | +| Configuration item | Description | Default value | Required | Since Version | +|-----------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------------------------|------------------| +| `gravitino.iceberg-rest.io-impl` | The IO implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aws.s3.S3FileIO` for S3. | (none) | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.s3-endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | (none) | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.s3-region` | The region of the S3 service, like `us-west-2`. | (none) | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.s3-path-style-access` | Whether to use path style access for S3. | false | No | 0.9.0-incubating | For other Iceberg s3 properties not managed by Gravitino like `s3.sse.type`, you could config it directly by `gravitino.iceberg-rest.s3.sse.type`. diff --git a/docs/lakehouse-iceberg-catalog.md b/docs/lakehouse-iceberg-catalog.md index 8fb9c61f327..8a7d7129263 100644 --- a/docs/lakehouse-iceberg-catalog.md +++ b/docs/lakehouse-iceberg-catalog.md @@ -76,6 +76,8 @@ Supports using static access-key-id and secret-access-key to access S3 data. | `s3-secret-access-key` | The static secret access key used to access S3 data. | (none) | No | 0.6.0-incubating | | `s3-endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | (none) | No | 0.6.0-incubating | | `s3-region` | The region of the S3 service, like `us-west-2`. | (none) | No | 0.6.0-incubating | +| `s3-path-style-access` | Whether to use path style access for S3. | false | No | 0.9.0-incubating | + For other Iceberg s3 properties not managed by Gravitino like `s3.sse.type`, you could config it directly by `gravitino.bypass.s3.sse.type`. diff --git a/docs/spark-connector/spark-catalog-iceberg.md b/docs/spark-connector/spark-catalog-iceberg.md index e35473c0e31..9c3e931556a 100644 --- a/docs/spark-connector/spark-catalog-iceberg.md +++ b/docs/spark-connector/spark-catalog-iceberg.md @@ -114,6 +114,7 @@ Gravitino spark connector will transform below property names which are defined | `s3-region` | `client.region` | The region of the S3 service, like `us-west-2`. | 0.6.0-incubating | | `s3-access-key-id` | `s3.access-key-id` | The static access key ID used to access S3 data. | 0.8.0-incubating | | `s3-secret-access-key` | `s3.secret-access-key` | The static secret access key used to access S3 data. | 0.8.0-incubating | +| `s3-path-style-access` | `s3.path-style-access` | Whether to use path style access for S3. | 0.9.0-incubating | | `oss-endpoint` | `oss.endpoint` | The endpoint of Aliyun OSS service. | 0.7.0-incubating | | `oss-access-key-id` | `client.access-key-id` | The static access key ID used to access OSS data. | 0.8.0-incubating | | `oss-secret-access-key` | `client.access-key-secret` | The static secret access key used to access OSS data. | 0.8.0-incubating | diff --git a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java index 638d0c6d311..59e67d35e94 100644 --- a/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java +++ b/iceberg/iceberg-common/src/main/java/org/apache/gravitino/iceberg/common/IcebergConfig.java @@ -144,6 +144,13 @@ public class IcebergConfig extends Config implements OverwriteDefaultConfig { .stringConf() .create(); + public static final ConfigEntry S3_PATH_STYLE_ACCESS = + new ConfigBuilder(S3Properties.GRAVITINO_S3_PATH_STYLE_ACCESS) + .doc("Whether to use path style access for S3") + .version(ConfigConstants.VERSION_0_9_0) + .booleanConf() + .createWithDefault(false); + public static final ConfigEntry OSS_ENDPOINT = new ConfigBuilder(OSSProperties.GRAVITINO_OSS_ENDPOINT) .doc("The endpoint of Aliyun OSS service")