-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Dmitry Kisler
committed
Apr 7, 2020
1 parent
257c992
commit 946719b
Showing
8 changed files
with
575 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,9 @@ | ||
# gbq-schema_to_json-schema | ||
|
||
Python library to convert google bigquery schema to jsonschema | ||
|
||
## Test | ||
|
||
```bash | ||
pytest -W ignore -vv . | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Dmitry Kisler © 2020 | ||
# www.dkisler.com | ||
|
||
r""" | ||
Author: Dmitry Kisler | ||
Email: admin@dkisler.com | ||
Objective: To convert Google Biq Query table schema to Json Schema | ||
References: | ||
- https://cloud.google.com/bigquery/docs/schemas#creating_a_json_schema_file | ||
- https://json-schema.org/ | ||
""" | ||
__version__ = "1.0" | ||
__all__ = ('__version__', 'gbqschema_to_jsonschema', 'jsonschema_to_gbqschema') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
# Dmitry Kisler © 2020 | ||
# www.dkisler.com | ||
|
||
from typing import Union, Tuple, List | ||
from collections import namedtuple | ||
from google.cloud import bigquery | ||
import fastjsonschema | ||
|
||
|
||
gbq_schema = { | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"required": [ | ||
"name", | ||
"type", | ||
"mode" | ||
], | ||
"properties": { | ||
"description": { | ||
"type": "string" | ||
}, | ||
"name": { | ||
"type": "string", | ||
"examples": [ | ||
"att1" | ||
] | ||
}, | ||
"type": { | ||
"type": "string", | ||
"enum": [ | ||
"INT", | ||
"INTEGER", | ||
"INT64", | ||
"FLOAT", | ||
"FLOAT64", | ||
"NUMERIC", | ||
"BOOL", | ||
"BOOLEAN", | ||
"STRING", | ||
"BYTES", | ||
"DATE", | ||
"DATETIME", | ||
"TIME", | ||
"TIMESTAMP" | ||
] | ||
}, | ||
"mode": { | ||
"type": "string", | ||
"enum": [ | ||
"REQUIRED", | ||
"NULLABLE" | ||
] | ||
} | ||
}, | ||
"additionalProperties": False, | ||
}, | ||
} | ||
|
||
validate = fastjsonschema.compile(gbq_schema) | ||
|
||
TEMPLATE = { | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"type": "array", | ||
"items": { | ||
"$ref": "#/definitions/element" | ||
}, | ||
"definitions": { | ||
"element": { | ||
"type": "object", | ||
"properties": { | ||
|
||
}, | ||
"additionalProperties": False, | ||
"required": [ | ||
], | ||
}, | ||
}, | ||
} | ||
|
||
MapTypes = namedtuple("map_types", | ||
gbq_schema['items']['properties']['type']['enum']) | ||
|
||
map_types = MapTypes( | ||
INT={"type": "integer"}, | ||
INTEGER={"type": "integer"}, | ||
INT64={"type": "integer"}, | ||
FLOAT={"type": "number"}, | ||
FLOAT64={"type": "number"}, | ||
NUMERIC={"type": "number"}, | ||
BOOL={"type": "boolean"}, | ||
BOOLEAN={"type": "boolean"}, | ||
STRING={"type": "string"}, | ||
BYTES={"type": "string"}, | ||
DATE={"type": "string", "format": "date"}, | ||
DATETIME={"type": "string", "format": "date-time"}, | ||
TIME={"type": "string", "format": "time"}, | ||
TIMESTAMP={"type": "string", "format": "time"} | ||
) | ||
|
||
|
||
def representation_json(gbq_schema: dict, | ||
additional_properties: bool = False) -> dict: | ||
"""Function to convert Google Big Query schema in JSON representation to json schema. | ||
Args: | ||
gbq_schema: Bigquery schema, JSON representation | ||
read https://cloud.google.com/bigquery/docs/schemas#creating_a_json_schema_file | ||
for details. | ||
additional_properties: Json schema should contain "additionalProperties". | ||
Returns: | ||
Json schema as dict. | ||
Raises: | ||
fastjsonschema.JsonSchemaException: Error occured if input Google Big Query schema is invalid. | ||
""" | ||
try: | ||
validate(gbq_schema) | ||
except fastjsonschema.JsonSchemaException as ex: | ||
raise ex | ||
|
||
output = TEMPLATE.copy() | ||
|
||
for element in gbq_schema: | ||
key = element['name'] | ||
|
||
output['definitions']['element']['properties'][key] = getattr(map_types, element['type']) | ||
|
||
if 'description' in element: | ||
output['definitions']['element']['properties'][key]['description'] = element['description'] | ||
|
||
if element['mode'] == "REQUIRED": | ||
output['definitions']['element']['required'].append(key) | ||
|
||
output['definitions']['element']['additionalProperties'] = additional_properties | ||
|
||
return output | ||
|
||
|
||
def representation_google_sdk(gbq_schema: List[bigquery.SchemaField], | ||
restrictive: bool = False) -> dict: | ||
"""Function to convert Google Big Query schema in Google SDK representation to json schema. | ||
Args: | ||
gbq_schema: bigquery schema, SDK repsentation | ||
read https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.schema.SchemaField.html | ||
for details. | ||
additional_properties: jsonschema should contain "additionalProperties". | ||
Returns: | ||
json schema as dict. | ||
""" | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Dmitry Kisler © 2020 | ||
# www.dkisler.com | ||
|
||
from typing import Union, Tuple, List | ||
from collections import namedtuple | ||
from google.cloud import bigquery | ||
import fastjsonschema | ||
|
||
|
||
def representation_json(json_schema: dict) -> dict: | ||
"""Function to convert json schema to Google Big Query schema in JSON representation. | ||
Args: | ||
json_schema: Json schema | ||
read https://json-schema.org/ | ||
for details. | ||
Returns: | ||
Json schema as dict. | ||
Raises: | ||
fastjsonschema.JsonSchemaDefinitionException: Error occured if input json schema is invalid. | ||
""" | ||
try: | ||
fastjsonschema.compile(json_schema) | ||
except fastjsonschema.JsonSchemaDefinitionException as ex: | ||
raise ex | ||
|
||
pass | ||
|
||
|
||
def representation_google_sdk(json_schema: dict) -> List[bigquery.SchemaField]: | ||
"""Function to convert json schema to Google Big Query schema in Google SDK representation. | ||
Args: | ||
json_schema: json schema | ||
read https://json-schema.org/ | ||
for details. | ||
Returns: | ||
List of SchemaField objects. | ||
""" | ||
pass |
Oops, something went wrong.