Skip to content

Commit

Permalink
[apache#4757]feat(trino-connector): Support more partition and sort o…
Browse files Browse the repository at this point in the history
…rder features of the Iceberg catalog (apache#4925)

### What changes were proposed in this pull request?

Support Iceberg partition expressions like: year(x), month(x), day(x),
hour(x), bucket(x, n), truncate(x,n)
Support Iceberg sort order expressions like: field DESC, field ASC,
field DESC NULLS FIRST, field ASC NULLS LAST

### Why are the changes needed?

Fix: apache#4757

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

New UTs and ITs
  • Loading branch information
diqiu50 authored Oct 22, 2024
1 parent d75c201 commit eded019
Show file tree
Hide file tree
Showing 10 changed files with 776 additions and 99 deletions.
22 changes: 15 additions & 7 deletions trino-connector/integration-test/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,24 @@ dependencies {
testRuntimeOnly(libs.junit.jupiter.engine)
}

tasks.register("setupDependencies") {
dependsOn(":trino-connector:trino-connector:jar")
dependsOn(":catalogs:catalog-lakehouse-iceberg:jar", ":catalogs:catalog-lakehouse-iceberg:runtimeJars")
dependsOn(":catalogs:catalog-jdbc-mysql:jar", ":catalogs:catalog-jdbc-mysql:runtimeJars")
dependsOn(":catalogs:catalog-jdbc-postgresql:jar", ":catalogs:catalog-jdbc-postgresql:runtimeJars")
dependsOn(":catalogs:catalog-hive:jar", ":catalogs:catalog-hive:runtimeJars")
}

tasks.build {
dependsOn("setupDependencies")
}

tasks.test {
val skipITs = project.hasProperty("skipITs")
if (skipITs) {
exclude("**/integration/test/**")
} else {
dependsOn(":trino-connector:trino-connector:jar")
dependsOn(":catalogs:catalog-lakehouse-iceberg:jar", ":catalogs:catalog-lakehouse-iceberg:runtimeJars")
dependsOn(":catalogs:catalog-jdbc-mysql:jar", ":catalogs:catalog-jdbc-mysql:runtimeJars")
dependsOn(":catalogs:catalog-jdbc-postgresql:jar", ":catalogs:catalog-jdbc-postgresql:runtimeJars")
dependsOn(":catalogs:catalog-hive:jar", ":catalogs:catalog-hive:runtimeJars")

dependsOn("setupDependencies")
doFirst {
copy {
from("${project.rootDir}/dev/docker/trino/conf")
Expand Down Expand Up @@ -113,8 +120,9 @@ tasks.test {
}

tasks.register<JavaExec>("TrinoTest") {
dependsOn("build")
classpath = sourceSets["test"].runtimeClasspath
mainClass.set("org.apache.gravitino.integration.test.trino.TrinoQueryTestTool")
mainClass.set("org.apache.gravitino.trino.connector.integration.test.TrinoQueryTestTool")

if (JavaVersion.current() > JavaVersion.VERSION_1_8) {
jvmArgs = listOf(
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ public static void main(String[] args) throws Exception {

if (testSetsDir.isEmpty()) {
testSetsDir = TrinoQueryIT.class.getClassLoader().getResource("trino-ci-testset").getPath();
testSetsDir = ITUtils.joinPath(testSetsDir, "trino-ci-testset/testsets");
testSetsDir = ITUtils.joinPath(testSetsDir, "testsets");
} else {
TrinoQueryIT.testsetsDir = testSetsDir;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
CREATE SCHEMA gt_db2;

USE gt_db2;

CREATE TABLE lineitem(
orderkey bigint,
partkey bigint,
suppkey bigint,
linenumber integer,
quantity decimal(12, 2),
extendedprice decimal(12, 2),
discount decimal(12, 2),
tax decimal(12, 2),
returnflag varchar,
linestatus varchar,
shipdate date,
commitdate date,
receiptdate date,
shipinstruct varchar,
shipmode varchar,
comment varchar
)
WITH (
partitioning = ARRAY['year(commitdate)'],
sorted_by = ARRAY['partkey', 'extendedprice desc']
);

show create table lineitem;

insert into lineitem select * from tpch.tiny.lineitem;

select * from lineitem order by orderkey, partkey limit 5;

CREATE TABLE tb01(
orderkey bigint,
partkey bigint,
suppkey bigint,
linenumber integer,
quantity decimal(12, 2),
extendedprice decimal(12, 2),
discount decimal(12, 2),
tax decimal(12, 2),
returnflag varchar,
linestatus varchar,
shipdate date,
commitdate date,
receiptdate date,
shipinstruct varchar,
shipmode varchar,
comment varchar
)
WITH (
partitioning = ARRAY['day(commitdate)', 'month(shipdate)', 'bucket(partkey, 2)', 'truncate(shipinstruct, 2)'],
sorted_by = ARRAY['partkey asc nulls last', 'extendedprice DESC NULLS FIRST']
);

show create table tb01;

drop table tb01;

drop table lineitem;

drop schema gt_db2;
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
CREATE SCHEMA

USE

CREATE TABLE

"CREATE TABLE %.gt_db2.lineitem (
orderkey bigint,
partkey bigint,
suppkey bigint,
linenumber integer,
quantity decimal(12, 2),
extendedprice decimal(12, 2),
discount decimal(12, 2),
tax decimal(12, 2),
returnflag varchar,
linestatus varchar,
shipdate date,
commitdate date,
receiptdate date,
shipinstruct varchar,
shipmode varchar,
comment varchar
)
COMMENT ''
WITH (
location = 'hdfs://%/user/iceberg/warehouse/TrinoQueryIT/gt_db2%/lineitem',
partitioning = ARRAY['year(commitdate)'],
sorted_by = ARRAY['partkey','extendedprice DESC']
)"

INSERT: 60175 rows

"1","22","48","4","28.00","25816.56","0.09","0.06","N","O","1996-04-21","1996-03-30","1996-05-16","NONE","AIR","lites. fluffily even de"
"1","157","10","6","32.00","33828.80","0.07","0.02","N","O","1996-01-30","1996-02-07","1996-02-03","DELIVER IN PERSON","MAIL","arefully slyly ex"
"1","241","23","5","24.00","27389.76","0.10","0.04","N","O","1996-03-30","1996-03-14","1996-04-01","NONE","FOB"," pending foxes. slyly re"
"1","637","38","3","8.00","12301.04","0.10","0.02","N","O","1996-01-29","1996-03-05","1996-01-31","TAKE BACK RETURN","REG AIR","riously. regular, express dep"
"1","674","75","2","36.00","56688.12","0.09","0.06","N","O","1996-04-12","1996-02-28","1996-04-20","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold "

CREATE TABLE

"CREATE TABLE %.gt_db2.tb01 (
orderkey bigint,
partkey bigint,
suppkey bigint,
linenumber integer,
quantity decimal(12, 2),
extendedprice decimal(12, 2),
discount decimal(12, 2),
tax decimal(12, 2),
returnflag varchar,
linestatus varchar,
shipdate date,
commitdate date,
receiptdate date,
shipinstruct varchar,
shipmode varchar,
comment varchar
)
COMMENT ''
WITH (
location = 'hdfs://%/user/iceberg/warehouse/TrinoQueryIT/gt_db2%/tb01',
partitioning = ARRAY['day(commitdate)','month(shipdate)','bucket(partkey, 2)','truncate(shipinstruct, 2)'],
sorted_by = ARRAY['partkey ASC NULLS LAST','extendedprice DESC NULLS FIRST']
)"

DROP TABLE

DROP TABLE

DROP SCHEMA
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@

set -e

DIR=$(cd "$(dirname "$0")" && pwd)/../..
DIR=$(cd "$(dirname "$0")" && pwd)/../../..
export GRAVITINO_ROOT_DIR=$(cd "$DIR" && pwd)
export GRAVITINO_HOME=$GRAVITINO_ROOT_DIR
export GRAVITINO_TEST=true
export HADOOP_USER_NAME=root
export HADOOP_USER_NAME=anonymous

echo $GRAVITINO_ROOT_DIR
cd $GRAVITINO_ROOT_DIR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public enum GravitinoErrorCode implements ErrorCodeSupplier {
GRAVITINO_OPERATION_FAILED(22, EXTERNAL),
GRAVITINO_RUNTIME_ERROR(23, EXTERNAL),
GRAVITINO_DUPLICATED_CATALOGS(24, EXTERNAL),
;
GRAVITINO_EXPRESSION_ERROR(25, EXTERNAL);

// suppress ImmutableEnumChecker because ErrorCode is outside the project.
@SuppressWarnings("ImmutableEnumChecker")
Expand Down
Loading

0 comments on commit eded019

Please sign in to comment.