Skip to content

Commit

Permalink
Initial implementation of ICEBERG_TABLE object type, currently only u…
Browse files Browse the repository at this point in the history
…nmanaged tables are supported
  • Loading branch information
littleK0i committed Dec 17, 2024
1 parent 32b93c3 commit 491ca76
Show file tree
Hide file tree
Showing 31 changed files with 514 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ jobs:
run: pip install -e .[dev]

- name: Run pytest
run: test/run_test.sh
run: test/run_test_full.sh
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

## [0.38.0] - 2024-12-17

- Introduced initial implementation of `ICEBERG_TABLE` object type. Currently only unmanaged Iceberg tables are supported.
- Added parameters `external_volume` and `catalog` for `SCHEMA` object type, required for Iceberg tables to work.
- Split `run_test.sh` script into two scripts: `run_test_full.sh` and `run_test_lite.sh`. The Lite version does not run tests which require complicated setup for external resources. At this moment it skips Iceberg tables.
- Added `iceberg_setup.sql` for tests, helps to prepare environment for Iceberg table tests.

Managed Iceberg tables will be implemented if we see a sufficient interest from users.

## [0.37.4] - 2024-12-06

- Relaxed argument validation for `oauth_snowpark` authenticator.
Expand Down
1 change: 1 addition & 0 deletions snowddl/blueprint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
ForeignKeyBlueprint,
FunctionBlueprint,
HybridTableBlueprint,
IcebergTableBlueprint,
MaterializedViewBlueprint,
MaskingPolicyBlueprint,
NetworkPolicyBlueprint,
Expand Down
17 changes: 15 additions & 2 deletions snowddl/blueprint/blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ class BusinessRoleBlueprint(AbstractBlueprint):
class DatabaseBlueprint(AbstractBlueprint):
full_name: DatabaseIdent
permission_model: Optional[str] = None
is_sandbox: Optional[bool] = None
is_transient: Optional[bool] = None
retention_time: Optional[int] = None
is_sandbox: Optional[bool] = None
owner_database_write: List[IdentPattern] = []
owner_database_read: List[IdentPattern] = []
owner_integration_usage: List[Ident] = []
Expand Down Expand Up @@ -216,6 +216,17 @@ class HybridTableBlueprint(SchemaObjectBlueprint, DependsOnMixin):
indexes: Optional[List[IndexReference]] = None


class IcebergTableBlueprint(SchemaObjectBlueprint):
external_volume: Ident
catalog: Ident
catalog_table_name: Optional[str] = None
catalog_namespace: Optional[str] = None
metadata_file_path: Optional[str] = None
base_location: Optional[str] = None
replace_invalid_characters: bool = False
auto_refresh: bool = False


class MaterializedViewBlueprint(SchemaObjectBlueprint):
text: str
columns: Optional[List[ViewColumn]] = None
Expand Down Expand Up @@ -312,9 +323,11 @@ class RowAccessPolicyBlueprint(SchemaObjectBlueprint):
class SchemaBlueprint(AbstractBlueprint):
full_name: SchemaIdent
permission_model: Optional[str] = None
is_sandbox: Optional[bool] = None
is_transient: Optional[bool] = None
retention_time: Optional[int] = None
is_sandbox: Optional[bool] = None
external_volume: Optional[Ident] = None
catalog: Optional[Ident] = None
owner_database_write: List[IdentPattern] = []
owner_database_read: List[IdentPattern] = []
owner_schema_write: List[IdentPattern] = []
Expand Down
12 changes: 10 additions & 2 deletions snowddl/blueprint/object_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ class ObjectType(Enum):
"blueprint_cls": "ExternalTableBlueprint",
}

# Technical object type, used for GRANTs only
# There is no blueprint
EXTERNAL_VOLUME = {
"singular": "EXTERNAL VOLUME",
"plural": "EXTERNAL VOLUMES",
"singular_for_ref": "VOLUME",
"singular_for_grant": "VOLUME",
}

FILE_FORMAT = {
"singular": "FILE FORMAT",
"plural": "FILE FORMATS",
Expand All @@ -121,13 +130,12 @@ class ObjectType(Enum):
"blueprint_cls": "HybridTableBlueprint",
}

# Technical object type, used for GRANTs only
# Currently there is no blueprint
ICEBERG_TABLE = {
"singular": "ICEBERG TABLE",
"plural": "ICEBERG TABLES",
"singular_for_ref": "TABLE",
"is_future_grant_supported": True,
"blueprint_cls": "IcebergTableBlueprint",
}

# Technical object type, used for GRANTs only
Expand Down
3 changes: 3 additions & 0 deletions snowddl/parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .file_format import FileFormatParser
from .function import FunctionParser
from .hybrid_table import HybridTableParser
from .iceberg_table import IcebergTableParser
from .materialized_view import MaterializedViewParser
from .masking_policy import MaskingPolicyParser
from .network_policy import NetworkPolicyParser
Expand Down Expand Up @@ -68,6 +69,7 @@
TableParser,
EventTableParser,
HybridTableParser,
IcebergTableParser,
DynamicTableParser,
ExternalTableParser,
StreamParser,
Expand Down Expand Up @@ -103,6 +105,7 @@
TableParser,
EventTableParser,
HybridTableParser,
IcebergTableParser,
DynamicTableParser,
ExternalTableParser,
StreamParser,
Expand Down
77 changes: 77 additions & 0 deletions snowddl/parser/iceberg_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from functools import partial

from snowddl.blueprint import IcebergTableBlueprint, Ident, SchemaObjectIdent
from snowddl.parser.abc_parser import AbstractParser, ParsedFile
from snowddl.parser.schema import schema_json_schema


# fmt: off
iceberg_table_json_schema = {
"type": "object",
"properties": {
"catalog_table_name": {
"type": "string"
},
"catalog_namespace": {
"type": "string",
},
"metadata_file_path": {
"type": "string"
},
"base_location": {
"type": "string"
},
"replace_invalid_characters": {
"type": "boolean"
},
"auto_refresh": {
"type": "boolean"
},
"comment": {
"type": "string"
},
},
"oneOf": [
{"required": ["catalog_table_name"]},
{"required": ["metadata_file_path"]},
{"required": ["base_location"]},
],
"additionalProperties": False
}
# fmt: on


class IcebergTableParser(AbstractParser):
def load_blueprints(self):
combined_params = {}

for database_name in self.get_database_names():
combined_params[database_name] = {}

for schema_name in self.get_schema_names_in_database(database_name):
schema_params = self.parse_single_entity_file(f"{database_name}/{schema_name}/params", schema_json_schema)
combined_params[database_name][schema_name] = schema_params

self.parse_schema_object_files("iceberg_table", iceberg_table_json_schema, partial(self.process_table, combined_params=combined_params))

def process_table(self, f: ParsedFile, combined_params: dict):
if not combined_params[f.database][f.schema].get("external_volume"):
raise ValueError("Iceberg table requires parameter [external_volume] to be defined on schema level")

if not combined_params[f.database][f.schema].get("catalog"):
raise ValueError("Iceberg table requires parameter [catalog] to be defined on schema level")

bp = IcebergTableBlueprint(
full_name=SchemaObjectIdent(self.env_prefix, f.database, f.schema, f.name),
external_volume=Ident(combined_params[f.database][f.schema].get("external_volume")),
catalog=Ident(combined_params[f.database][f.schema].get("catalog")),
catalog_table_name=f.params.get("catalog_table_name"),
catalog_namespace=f.params.get("catalog_namespace"),
metadata_file_path=f.params.get("metadata_file_path"),
base_location=f.params.get("base_location"),
replace_invalid_characters=f.params.get("replace_invalid_characters", False),
auto_refresh=f.params.get("auto_refresh", False),
comment=f.params.get("comment"),
)

self.config.add_blueprint(bp)
14 changes: 11 additions & 3 deletions snowddl/parser/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,20 @@
"permission_model": {
"type": "string",
},
"is_sandbox": {
"type": "boolean"
},
"is_transient": {
"type": "boolean"
},
"retention_time": {
"type": "integer"
},
"is_sandbox": {
"type": "boolean"
"external_volume": {
"type": "string"
},
"catalog": {
"type": "string",
},
"owner_database_read": {
"type": "array",
Expand Down Expand Up @@ -112,9 +118,11 @@ def load_blueprints(self):
bp = SchemaBlueprint(
full_name=SchemaIdent(self.env_prefix, database_name, schema_name),
permission_model=schema_permission_model_name,
is_sandbox=combined_params.get("is_sandbox", False),
is_transient=combined_params.get("is_transient", False),
retention_time=combined_params.get("retention_time", None),
is_sandbox=combined_params.get("is_sandbox", False),
external_volume=Ident(schema_params.get("external_volume")) if schema_params.get("external_volume") else None,
catalog=Ident(schema_params.get("catalog")) if schema_params.get("catalog") else None,
owner_database_write=[IdentPattern(p) for p in schema_params.get("owner_database_write", [])],
owner_database_read=[IdentPattern(p) for p in schema_params.get("owner_database_read", [])],
owner_schema_write=[IdentPattern(p) for p in schema_params.get("owner_schema_write", [])],
Expand Down
4 changes: 3 additions & 1 deletion snowddl/resolver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from .foreign_key import ForeignKeyResolver
from .function import FunctionResolver
from .hybrid_table import HybridTableResolver
from .iceberg_table import IcebergTableResolver
from .masking_policy import MaskingPolicyResolver
from .materialized_view import MaterializedViewResolver
from .network_policy import NetworkPolicyResolver
Expand Down Expand Up @@ -72,6 +73,7 @@
TableResolver,
EventTableResolver,
HybridTableResolver,
IcebergTableResolver,
DynamicTableResolver,
ExternalTableResolver,
PrimaryKeyResolver,
Expand Down Expand Up @@ -138,8 +140,8 @@
TableResolver,
EventTableResolver,
HybridTableResolver,
IcebergTableResolver,
DynamicTableResolver,
ExternalTableResolver,
PrimaryKeyResolver,
UniqueKeyResolver,
ForeignKeyResolver,
Expand Down
Loading

0 comments on commit 491ca76

Please sign in to comment.