From a3c431474a436fbddf2cc0ba0f9603ead29f4f41 Mon Sep 17 00:00:00 2001 From: Dmitry Kisler Date: Sun, 17 May 2020 03:48:04 +0200 Subject: [PATCH] v1.2.0 - adds record support --- gbqschema_converter/__init__.py | 2 +- gbqschema_converter/__main__.py | 8 +- .../gbqschema_to_jsonschema.py | 108 ++++++++++------- .../jsonschema_to_gbqschema.py | 22 ++-- setup.py | 4 +- tests/test_gbqschema_to_jsonschema.py | 110 ++++++++++++++++-- tests/test_jsonschema_to_gbqschema.py | 104 ++++++++++++++++- 7 files changed, 284 insertions(+), 74 deletions(-) diff --git a/gbqschema_converter/__init__.py b/gbqschema_converter/__init__.py index 646808c..85633cd 100644 --- a/gbqschema_converter/__init__.py +++ b/gbqschema_converter/__init__.py @@ -9,5 +9,5 @@ - https://cloud.google.com/bigquery/docs/schemas#creating_a_json_schema_file - https://json-schema.org/ """ -__version__ = "1.1.0" +__version__ = "1.2.0" __all__ = ['__version__', 'gbqschema_to_jsonschema', 'jsonschema_to_gbqschema'] diff --git a/gbqschema_converter/__main__.py b/gbqschema_converter/__main__.py index 6746f2c..4b05f11 100644 --- a/gbqschema_converter/__main__.py +++ b/gbqschema_converter/__main__.py @@ -18,12 +18,8 @@ logs = logging.getLogger(help_string) -def get_args(): - """CL input parameter. - - Returns: - - """ +def get_args() -> argparse.Namespace: + """CL input parameters.""" parser = argparse.ArgumentParser(description=help_string) required_either = parser.add_mutually_exclusive_group(required=True) required_either.add_argument('-i', '--input', diff --git a/gbqschema_converter/gbqschema_to_jsonschema.py b/gbqschema_converter/gbqschema_to_jsonschema.py index 7e79b44..f860fe2 100644 --- a/gbqschema_converter/gbqschema_to_jsonschema.py +++ b/gbqschema_converter/gbqschema_to_jsonschema.py @@ -47,6 +47,7 @@ "DATETIME", "TIME", "TIMESTAMP", + "RECORD", ], }, "mode": { @@ -64,7 +65,7 @@ ], }, }, - "additionalProperties": False, + "additionalProperties": True, }, } @@ -77,15 +78,7 @@ "$ref": "#/definitions/element" }, "definitions": { - "element": { - "type": "object", - "properties": { - - }, - "additionalProperties": False, - "required": [ - ], - }, + "element": {}, }, } @@ -112,7 +105,8 @@ "type": "string", "pattern": "^((|[0-1])[0-9]|2[0-3]):((|[0-5])[0-9]):((|[0-5])[0-9])(|.[0-9]{1,6})$" }, - TIMESTAMP={"type": "string", "format": "date-time"} + TIMESTAMP={"type": "string", "format": "date-time"}, + RECORD={"type": "object"}, ) @@ -136,26 +130,42 @@ def json_representation(gbq_schema: dict, fastjsonschema.JsonSchemaException: Error occured if input Google BigQuery schema is invalid. """ - try: - validate_json(gbq_schema) - except fastjsonschema.JsonSchemaException as ex: - raise ex - output = deepcopy(TEMPLATE) - - for element in gbq_schema: - key = element['name'] - - output['definitions']['element']['properties'][key] = getattr(map_types, - element['type']) - - if 'description' in element: - if element['description']: - output['definitions']['element']['properties'][key]['description'] = element['description'] - - if 'mode' in element: - if element['mode'] == "REQUIRED": - output['definitions']['element']['required'].append(key) + + def _converter(gbq_schema: dict) -> dict: + """Conversion step.""" + try: + validate_json(gbq_schema) + except fastjsonschema.JsonSchemaException as ex: + raise ex + + output = { + "type": "object", + "properties": { + }, + "additionalProperties": False, + "required": [ + ], + } + + for element in gbq_schema: + key = element['name'] + + output['properties'][key] = getattr(map_types, element['type']) + + if 'mode' in element: + if element['mode'] == "REQUIRED": + output['required'].append(key) + + if element['type'] == "RECORD": + output['properties'][key] = _converter(element['fields']) + + if not output['required']: + _ = output.pop('required') + + return output + + output['definitions']['element'] = _converter(gbq_schema) output['definitions']['element']['additionalProperties'] = additional_properties @@ -180,17 +190,33 @@ def sdk_representation(gbq_schema: List[SchemaField], """ output = deepcopy(TEMPLATE) - for element in gbq_schema: - key = element.name - - output['definitions']['element']['properties'][key] = getattr(map_types, - element.field_type) - - if element.description: - output['definitions']['element']['properties'][key]['description'] = element.description - - if element.mode == "REQUIRED": - output['definitions']['element']['required'].append(key) + def _converter(gbq_schema: dict) -> dict: + output = { + "type": "object", + "properties": { + }, + "additionalProperties": False, + "required": [ + ], + } + + for element in gbq_schema: + key = element.name + + output['properties'][key] = getattr(map_types, element.field_type) + + if element.mode == "REQUIRED": + output['required'].append(key) + + if element.field_type == "RECORD": + output['properties'][key] = _converter(element.fields) + + if not output['required']: + _ = output.pop('required') + + return output + + output['definitions']['element'] = _converter(gbq_schema) output['definitions']['element']['additionalProperties'] = additional_properties diff --git a/gbqschema_converter/jsonschema_to_gbqschema.py b/gbqschema_converter/jsonschema_to_gbqschema.py index e37805a..04a10d8 100644 --- a/gbqschema_converter/jsonschema_to_gbqschema.py +++ b/gbqschema_converter/jsonschema_to_gbqschema.py @@ -9,14 +9,15 @@ MapTypes = namedtuple("map_types", - ['integer', 'number', 'boolean', 'string', 'date']) + ['integer', 'number', 'boolean', 'string', 'date', 'object']) map_types = MapTypes( - integer="INTEGER", - number="NUMERIC", + integer="INT64", + number="FLOAT64", boolean="BOOLEAN", string="STRING", - date="DATE" + date="DATE", + object="RECORD" ) TEMPLATE_GBQ_COLUMN = { @@ -82,10 +83,7 @@ def __gbq_columns(properties: dict, else: gbq_column['type'] = "TIMESTAMP" if v['format'] == "date-time"\ else getattr(map_types, v['format']) if v['format'] in map_types.__dir__()\ - else "STRING" - - if to_sdk_schema: - gbq_column['field_type'] = gbq_column.pop('type') + else "STRING" if required: if k in required: @@ -96,7 +94,12 @@ def __gbq_columns(properties: dict, else: _ = gbq_column.pop('description') + if gbq_column['type'] == "RECORD": + gbq_column['fields'] = __gbq_columns(v['properties'], + v['required']) + if to_sdk_schema: + gbq_column['field_type'] = gbq_column.pop('type') gbq_column = SchemaField(**gbq_column) output.append(gbq_column) @@ -104,8 +107,7 @@ def __gbq_columns(properties: dict, output = [] - if 'definitions' in json_schema\ - or 'items' in json_schema: + if 'definitions' in json_schema: for prop in json_schema['definitions'].values(): properties = prop['properties'] required = prop['required'] if 'required' in prop else None diff --git a/setup.py b/setup.py index 9deac6e..ffa5fb2 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setup( name='gbqschema_converter', - version='1.1.0', + version='1.2.0', description="Library to convert Google BigQuery Table Schema into Json Schema", long_description=README, long_description_content_type="text/markdown", @@ -21,7 +21,7 @@ author_email="admin@dkisler.com", license='MIT', classifiers=[ - "Development Status :: 3 - Alpha", + "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", diff --git a/tests/test_gbqschema_to_jsonschema.py b/tests/test_gbqschema_to_jsonschema.py index 32d84f8..607ddcc 100644 --- a/tests/test_gbqschema_to_jsonschema.py +++ b/tests/test_gbqschema_to_jsonschema.py @@ -2,7 +2,6 @@ # www.dkisler.com import pathlib -import pytest import importlib.util from types import ModuleType from google.cloud.bigquery import SchemaField @@ -113,11 +112,9 @@ def test_json_validator() -> None: "properties": { "att_01": { "type": "integer", - "description": "Att 1" }, "att_02": { "type": "number", - "description": "Att 2" }, "att_03": { "type": "number" @@ -184,13 +181,11 @@ def test_json_validator() -> None: def test_json_representation_conversion() -> None: schema_in = [ { - "description": "Att 1", "name": "att_01", "type": "INT64", "mode": "NULLABLE" }, { - "description": "Att 2", "name": "att_02", "type": "FLOAT64", "mode": "REQUIRED" @@ -258,7 +253,7 @@ def test_json_representation_conversion() -> None: ] schema_convert = module.json_representation(schema_in, True) - + assert schema_convert == schema_out,\ "Convertion doesn't work" @@ -267,8 +262,8 @@ def test_json_representation_conversion() -> None: def test_sdk_representation_conversion() -> None: schema_in = [ - SchemaField('att_01', 'INT64', 'NULLABLE', 'Att 1', ()), - SchemaField('att_02', 'FLOAT64', 'REQUIRED', 'Att 2', ()), + SchemaField('att_01', 'INT64', 'NULLABLE', None, ()), + SchemaField('att_02', 'FLOAT64', 'REQUIRED', None, ()), SchemaField('att_03', 'NUMERIC', 'REQUIRED', None, ()), SchemaField('att_04', 'STRING', 'REQUIRED', None, ()), SchemaField('att_05', 'BOOL', 'REQUIRED', None, ()), @@ -289,3 +284,102 @@ def test_sdk_representation_conversion() -> None: "Convertion doesn't work" return + + +schema_out_record = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "array", + "items": { + "$ref": "#/definitions/element", + }, + "definitions": { + "element": { + "type": "object", + "properties": { + "att_01": { + "type": "integer", + }, + "att_02": { + "type": "object", + "properties": { + "att_11": { + "type": "number", + }, + "att_12": { + "type": "string", + }, + }, + "additionalProperties": False, + "required": [ + "att_11", + ], + }, + }, + "additionalProperties": False, + "required": [ + "att_01", + ], + }, + }, +} + + +def test_json_representation_conversion_record() -> None: + schema_in = [ + { + "name": "att_01", + "type": "INT64", + "mode": "REQUIRED" + }, + { + "name": "att_02", + "type": "RECORD", + "mode": "NULLABLE", + "fields": [ + { + "name": "att_11", + "type": "FLOAT64", + "mode": "REQUIRED", + }, + { + "name": "att_12", + "type": "STRING", + "mode": "NULLABLE", + }, + ], + }, + ] + + schema_convert = module.json_representation(schema_in) + print(schema_convert) + assert schema_convert == schema_out_record,\ + "Convertion doesn't work" + + return + + +def test_sdk_representation_conversion_record() -> None: + schema_in = [ + SchemaField('att_01', 'INT64', 'REQUIRED', None, ()), + SchemaField('att_02', 'RECORD', 'NULLABLE', None, ( + SchemaField('att_11', 'FLOAT64', 'REQUIRED', None, ()), + SchemaField('att_12', 'STRING', 'NULLABLE', None, ())) + ) + ] + + schema_convert = module.sdk_representation(schema_in) + + assert schema_convert == schema_out_record,\ + "Convertion doesn't work" + + return + + +if __name__ == "__main__": + test_module_exists() + test_module_miss_functions() + test_json_validator() + test_json_representation_conversion() + test_sdk_representation_conversion() + test_json_representation_conversion_record() + test_sdk_representation_conversion_record() diff --git a/tests/test_jsonschema_to_gbqschema.py b/tests/test_jsonschema_to_gbqschema.py index c08157f..3c44aaa 100644 --- a/tests/test_jsonschema_to_gbqschema.py +++ b/tests/test_jsonschema_to_gbqschema.py @@ -2,7 +2,6 @@ # www.dkisler.com import pathlib -import pytest import importlib.util from types import ModuleType from google.cloud.bigquery import SchemaField @@ -73,7 +72,7 @@ def test_json_validator() -> None: except Exception as ex: assert "Unknown type: 'array1'" in str(ex),\ "Input validation doesn't work" - + schema_in = { "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", @@ -153,13 +152,13 @@ def test_json_representation_conversion() -> None: { "description": "Att 1", "name": "att_01", - "type": "INTEGER", + "type": "INT64", "mode": "NULLABLE" }, { "description": "Att 2", "name": "att_02", - "type": "NUMERIC", + "type": "FLOAT64", "mode": "REQUIRED" }, { @@ -199,8 +198,8 @@ def test_json_representation_conversion() -> None: def test_sdk_representation_conversion() -> None: schema_out = [ - SchemaField('att_01', 'INTEGER', 'NULLABLE', 'Att 1', ()), - SchemaField('att_02', 'NUMERIC', 'REQUIRED', 'Att 2', ()), + SchemaField('att_01', 'INT64', 'NULLABLE', 'Att 1', ()), + SchemaField('att_02', 'FLOAT64', 'REQUIRED', 'Att 2', ()), SchemaField('att_03', 'STRING', 'REQUIRED', None, ()), SchemaField('att_04', 'BOOLEAN', 'REQUIRED', None, ()), SchemaField('att_05', 'DATE', 'REQUIRED', None, ()), @@ -214,3 +213,96 @@ def test_sdk_representation_conversion() -> None: "Convertion doesn't work" return + + +schema_in_record = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "array", + "items": { + "$ref": "#/definitions/element", + }, + "definitions": { + "element": { + "type": "object", + "properties": { + "att_01": { + "type": "integer", + "description": "Att 1", + }, + "att_02": { + "type": "object", + "description": "Att 2", + "properties": { + "att_11": { + "type": "number", + }, + "att_12": { + "type": "string", + }, + }, + "additionalProperties": False, + "required": [ + "att_11", + ], + }, + }, + "additionalProperties": False, + "required": [ + "att_01", + ], + }, + }, +} + + +def test_json_representation_conversion_record() -> None: + schema_out = [ + { + "description": "Att 1", + "name": "att_01", + "type": "INT64", + "mode": "REQUIRED" + }, + { + "description": "Att 2", + "name": "att_02", + "type": "RECORD", + "mode": "NULLABLE", + "fields": [ + { + "name": "att_11", + "type": "FLOAT64", + "mode": "REQUIRED" + }, + { + "name": "att_12", + "type": "STRING", + "mode": "NULLABLE" + } + ] + }, + ] + + schema_convert = module.json_representation(schema_in_record) + + assert schema_convert == schema_out,\ + "Convertion doesn't work" + + return + + +def test_sdk_representation_conversion_record() -> None: + schema_out = [ + SchemaField('att_01', 'INT64', 'REQUIRED', 'Att 1', ()), + SchemaField('att_02', 'RECORD', 'NULLABLE', 'Att 2', ( + SchemaField('att_11', 'FLOAT64', 'REQUIRED', None, ()), + SchemaField('att_12', 'STRING', 'NULLABLE', None, ())) + ) + ] + + schema_convert = module.sdk_representation(schema_in_record) + + assert schema_convert == schema_out,\ + "Convertion doesn't work" + + return