Skip to content

Commit

Permalink
[apache#1449] feat(catalogs): Introudce new module bundled-catalog
Browse files Browse the repository at this point in the history
…for query engine. (apache#1454)

### What changes were proposed in this pull request?

- Add a new module `bundled-catalog` to hold common information like
property meta of all catalogs
- Put `bundled-catalog` to `trino-connector` dependency and verify
everything is OK.

### Why are the changes needed?

We need a common module to hold the property meta of all catalogs so
that the query engine depends on it can use it for query.

Fix: apache#1449 

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

Add some UT in module `Trino-connector`
  • Loading branch information
yuqi1129 authored Jan 15, 2024
1 parent 3f35ed1 commit 04c87b8
Show file tree
Hide file tree
Showing 18 changed files with 239 additions and 22 deletions.
5 changes: 3 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ tasks {
subprojects.forEach() {
if (!it.name.startsWith("catalog") &&
!it.name.startsWith("client") && it.name != "trino-connector" &&
it.name != "integration-test"
it.name != "integration-test" && it.name != "bundled-catalog"
) {
from(it.configurations.runtimeClasspath)
into("distribution/package/libs")
Expand All @@ -466,7 +466,8 @@ tasks {
if (!it.name.startsWith("catalog") &&
!it.name.startsWith("client") &&
it.name != "trino-connector" &&
it.name != "integration-test"
it.name != "integration-test" &&
it.name != "bundled-catalog"
) {
dependsOn("${it.name}:build")
from("${it.name}/build/libs")
Expand Down
76 changes: 76 additions & 0 deletions catalogs/bundled-catalog/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar

plugins {
id("java")
alias(libs.plugins.shadow)
}

dependencies {
implementation(project(":core"))
implementation(project(":catalogs:catalog-hive"))
implementation(project(":catalogs:catalog-lakehouse-iceberg"))
implementation(project(":catalogs:catalog-jdbc-mysql"))
implementation(project(":catalogs:catalog-jdbc-postgresql"))
}

tasks.withType<ShadowJar>(ShadowJar::class.java) {
isZip64 = true
configurations = listOf(project.configurations.compileClasspath.get())
archiveClassifier.set("")

dependencies {
exclude("org.*")
exclude("javax.*")
}

exclude("**/package-info.class")
exclude("**/*.properties")
exclude("**/*.html")
exclude("org/**")
exclude("META-INF")
exclude("META-INF/**")
exclude("module-info.class")
exclude("com/google/**")
exclude("com/fasterxml/**")
exclude("javax/**")
exclude("schema/**")
exclude("fr/**")
exclude("google/**")
exclude("groovy/**")
exclude("images/**")
exclude("**/*.conf")
exclude("**/*.so")
exclude("**/*.sxd")
exclude("**/*.xsd")
exclude("*.ddl")
exclude("**/*.txt")
exclude("**/*.md")
exclude("**/*.dtd")
exclude("**/*.thrift")
exclude("**/*.jdo")
exclude("**/LICENSE")
exclude("**/*.MF")
exclude("**/*.xml")
exclude("*.proto")
exclude("*.template")
exclude("webapps")
exclude("license/*")
exclude("*.xml")
exclude("*.css")
exclude("*.jnilib")
exclude("*.dll")
exclude("*.jocl")
exclude("NOTICE")

minimize()
}

tasks.jar {
dependsOn(tasks.named("shadowJar"))
archiveClassifier.set("empty")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

package com.datastrato.catalog.common;

import com.datastrato.gravitino.catalog.BasePropertiesMetadata;
import com.datastrato.gravitino.catalog.PropertiesMetadata;
import com.datastrato.gravitino.catalog.PropertyEntry;
import com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta;
import com.datastrato.gravitino.catalog.hive.HiveSchemaPropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
import com.datastrato.gravitino.catalog.lakehouse.iceberg.IcebergCatalogPropertiesMetadata;
import com.datastrato.gravitino.catalog.lakehouse.iceberg.IcebergSchemaPropertiesMetadata;
import com.datastrato.gravitino.catalog.lakehouse.iceberg.IcebergTablePropertiesMetadata;
import java.util.HashSet;
import java.util.Set;

/**
* The {@link ClassProvider} class serves as a container for the necessary classes used by the
* Gravitino query engine, with a primary focus on classes related to property metadata.
*
* <p>Purpose of this module and class:
*
* <pre>
* - Catalog-related classes are essential for the query engine to directly access catalog information.
* - The query engine should be able to detect catalog changes and automatically reload catalog-related
* information to ensure synchronization.
* - Including catalog-related jar packages directly is suboptimal for query engines as it may introduce
* unnecessary content.
* </pre>
*
* Therefore, this module is used to store the required classes for the query engine's
* functionality.
*/
public class ClassProvider {

private static final Set<Class<?>> BASE_CLASS =
new HashSet<Class<?>>() {
{
add(BasePropertiesMetadata.class);
add(PropertyEntry.class);
add(PropertiesMetadata.class);
}
};

private static final Set<Class<?>> HIVE_NEED_CLASS =
new HashSet<Class<?>>() {
{
add(HiveTablePropertiesMetadata.class);
add(HiveSchemaPropertiesMetadata.class);
add(HiveCatalogPropertiesMeta.class);
}
};

private static final Set<Class<?>> ICEBERG_NEED_CLASS =
new HashSet<Class<?>>() {
{
add(IcebergTablePropertiesMetadata.class);
add(IcebergSchemaPropertiesMetadata.class);
add(IcebergCatalogPropertiesMetadata.class);
}
};

private static final Set<Class<?>> MYSQL_NEED_CLASS =
new HashSet<Class<?>>() {
{
// TODO
}
};

private static final Set<Class<?>> PG_NEED_CLASS =
new HashSet<Class<?>>() {
{
// TODO
}
};
}
8 changes: 6 additions & 2 deletions catalogs/catalog-hive/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,16 @@ dependencies {
tasks {
val copyDepends by registering(Copy::class) {
from(configurations.runtimeClasspath)
into("build/libs")
// Why should we rename the jar files? Because the directory `build/libs` is the output directory of
// the task `build` and `copyDepends`. Task `shadowJar` of project `bundled-catalog` depends on the output
// of task `build` and mistakenly thinks that it depends on the task `copyDepends`, and errors occur.
// The same goes for `catalog-lakehouse-iceberg`, `catalog-jdbc-mysql` and `catalog-jdbc-postgresql`.
into("build/libs_all")
}

val copyCatalogLibs by registering(Copy::class) {
dependsOn(copyDepends, "build")
from("build/libs")
from("build/libs_all", "build/libs")
into("$rootDir/distribution/package/catalogs/hive/libs")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.datastrato.gravitino.Namespace;
import com.datastrato.gravitino.catalog.CatalogOperations;
import com.datastrato.gravitino.catalog.PropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType;
import com.datastrato.gravitino.catalog.hive.converter.ToHiveType;
import com.datastrato.gravitino.exceptions.NoSuchCatalogException;
import com.datastrato.gravitino.exceptions.NoSuchSchemaException;
Expand Down Expand Up @@ -52,7 +53,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.SERDE_NAME;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.SERDE_PARAMETER_PREFIX;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.MANAGED_TABLE;
import static com.datastrato.gravitino.rel.expressions.transforms.Transforms.identity;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;

import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType;
import com.datastrato.gravitino.catalog.hive.converter.FromHiveType;
import com.datastrato.gravitino.catalog.hive.converter.ToHiveType;
import com.datastrato.gravitino.catalog.rel.BaseTable;
Expand Down Expand Up @@ -45,7 +46,6 @@
import lombok.ToString;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import static com.datastrato.gravitino.catalog.PropertyEntry.stringImmutablePropertyEntry;
import static com.datastrato.gravitino.catalog.PropertyEntry.stringReservedPropertyEntry;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.StorageFormat.TEXTFILE;
import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.MANAGED_TABLE;

import com.datastrato.gravitino.catalog.BasePropertiesMetadata;
import com.datastrato.gravitino.catalog.PropertyEntry;
Expand All @@ -19,7 +19,6 @@
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.metastore.TableType;

public class HiveTablePropertiesMetadata extends BasePropertiesMetadata {
public static final String COMMENT = "comment";
Expand Down Expand Up @@ -82,6 +81,14 @@ public class HiveTablePropertiesMetadata extends BasePropertiesMetadata {

private static final String REGEX_SERDE_CLASS = "org.apache.hadoop.hive.serde2.RegexSerDe";

public enum TableType {
MANAGED_TABLE,
EXTERNAL_TABLE,
VIRTUAL_VIEW,
INDEX_TABLE,
VIRTUAL_INDEX,
}

enum StorageFormat {
SEQUENCEFILE(
SEQUENCEFILE_INPUT_FORMAT_CLASS, SEQUENCEFILE_OUTPUT_FORMAT_CLASS, LAZY_SIMPLE_SERDE_CLASS),
Expand Down
4 changes: 2 additions & 2 deletions catalogs/catalog-jdbc-mysql/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ dependencies {
tasks {
val copyDepends by registering(Copy::class) {
from(configurations.runtimeClasspath)
into("build/libs")
into("build/libs_all")
}
val copyCatalogLibs by registering(Copy::class) {
dependsOn(copyDepends, "build")
from("build/libs")
from("build/libs_all", "build/libs")
into("$rootDir/distribution/package/catalogs/jdbc-mysql/libs")
}

Expand Down
4 changes: 2 additions & 2 deletions catalogs/catalog-jdbc-postgresql/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ dependencies {
tasks {
val copyDepends by registering(Copy::class) {
from(configurations.runtimeClasspath)
into("build/libs")
into("build/libs_all")
}
val copyCatalogLibs by registering(Copy::class) {
dependsOn(copyDepends, "build")
from("build/libs")
from("build/libs_all", "build/libs")
into("$rootDir/distribution/package/catalogs/jdbc-postgresql/libs")
}

Expand Down
4 changes: 2 additions & 2 deletions catalogs/catalog-lakehouse-iceberg/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ dependencies {
tasks {
val copyDepends by registering(Copy::class) {
from(configurations.runtimeClasspath)
into("build/libs")
into("build/libs_all")
}
val copyCatalogLibs by registering(Copy::class) {
dependsOn(copyDepends, "build")
from("build/libs")
from("build/libs_all", "build/libs")
into("$rootDir/distribution/package/catalogs/lakehouse-iceberg/libs")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,4 +304,19 @@ public static <T extends Enum<T>> PropertyEntry<T> enumImmutablePropertyEntry(
return enumPropertyEntry(
name, description, required, true, javaType, defaultValue, hidden, reserved);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("PropertyEntry{");
sb.append("name='").append(name).append('\'');
sb.append(", description='").append(description).append('\'');
sb.append(", required=").append(required);
sb.append(", immutable=").append(immutable);
sb.append(", javaType=").append(javaType);
sb.append(", defaultValue=").append(defaultValue);
sb.append(", hidden=").append(hidden);
sb.append(", reserved=").append(reserved);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TEXT_INPUT_FORMAT_CLASS;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TOTAL_SIZE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TRANSIENT_LAST_DDL_TIME;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.MANAGED_TABLE;
import static org.apache.hadoop.hive.serde.serdeConstants.DATE_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.INT_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
Expand All @@ -35,6 +35,7 @@
import com.datastrato.gravitino.catalog.hive.HiveClientPool;
import com.datastrato.gravitino.catalog.hive.HiveSchemaPropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType;
import com.datastrato.gravitino.client.GravitinoMetaLake;
import com.datastrato.gravitino.dto.rel.ColumnDTO;
import com.datastrato.gravitino.dto.rel.expressions.FieldReferenceDTO;
Expand Down Expand Up @@ -77,7 +78,6 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
Expand Down
2 changes: 1 addition & 1 deletion settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ plugins {
rootProject.name = "gravitino"

include("api", "common", "core", "meta", "server", "integration-test", "server-common")
include("catalogs:catalog-hive", "catalogs:catalog-lakehouse-iceberg", "catalogs:catalog-jdbc-common", "catalogs:catalog-jdbc-mysql", "catalogs:catalog-jdbc-postgresql")
include("catalogs:bundled-catalog", "catalogs:catalog-hive", "catalogs:catalog-lakehouse-iceberg", "catalogs:catalog-jdbc-common", "catalogs:catalog-jdbc-mysql", "catalogs:catalog-jdbc-postgresql")
include("clients:client-java", "clients:client-java-runtime")
include("trino-connector")
include("web")
Expand Down
1 change: 1 addition & 0 deletions trino-connector/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ repositories {

dependencies {
implementation(project(":clients:client-java-runtime", configuration = "shadow"))
implementation(project(":catalogs:bundled-catalog", configuration = "shadow"))
implementation(libs.jackson.databind)
implementation(libs.jackson.annotations)
implementation(libs.guava)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@

import com.datastrato.gravitino.shaded.org.apache.commons.collections4.bidimap.TreeBidiMap;
import com.datastrato.gravitino.trino.connector.catalog.PropertyConverter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;

public class HiveTablePropertyConverter extends PropertyConverter {

// Trino property key does not allow upper case character and '-', so we need to map it to
// Gravitino
private static final TreeBidiMap<String, String> TRINO_KEY_TO_GRAVITINO_KEY =
@VisibleForTesting
static final TreeBidiMap<String, String> TRINO_KEY_TO_GRAVITINO_KEY =
new TreeBidiMap<>(
new ImmutableMap.Builder<String, String>()
.put("format", "format")
Expand Down
Loading

0 comments on commit 04c87b8

Please sign in to comment.