From e16f448c7642ac489acb3670e4c9744c6607baf3 Mon Sep 17 00:00:00 2001 From: Ilyas Gasanov Date: Mon, 25 Nov 2024 08:44:30 +0300 Subject: [PATCH 1/5] [DOP-21442] Add Excel API schema --- docs/changelog/next_release/139.feature.rst | 1 + syncmaster/schemas/v1/__init__.py | 2 +- syncmaster/schemas/v1/file_formats.py | 1 + syncmaster/schemas/v1/transfers/file/base.py | 10 +-- .../schemas/v1/transfers/file_format.py | 13 ++- .../test_create_transfer.py | 79 +++++++++++++++++-- .../test_file_transfers/test_read_transfer.py | 10 +++ .../test_update_transfer.py | 19 +++-- 8 files changed, 116 insertions(+), 19 deletions(-) create mode 100644 docs/changelog/next_release/139.feature.rst diff --git a/docs/changelog/next_release/139.feature.rst b/docs/changelog/next_release/139.feature.rst new file mode 100644 index 00000000..2ac222fb --- /dev/null +++ b/docs/changelog/next_release/139.feature.rst @@ -0,0 +1 @@ +Add Excel API schema \ No newline at end of file diff --git a/syncmaster/schemas/v1/__init__.py b/syncmaster/schemas/v1/__init__.py index aeffd086..6fa935b0 100644 --- a/syncmaster/schemas/v1/__init__.py +++ b/syncmaster/schemas/v1/__init__.py @@ -36,7 +36,7 @@ PostgresReadTransferSourceAndTarget, ReadDBTransfer, ) -from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine +from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine from syncmaster.schemas.v1.transfers.run import ( CreateRunSchema, ReadRunSchema, diff --git a/syncmaster/schemas/v1/file_formats.py b/syncmaster/schemas/v1/file_formats.py index a0175e99..ebf3e389 100644 --- a/syncmaster/schemas/v1/file_formats.py +++ b/syncmaster/schemas/v1/file_formats.py @@ -5,3 +5,4 @@ CSV_FORMAT = Literal["csv"] JSONLINE_FORMAT = Literal["jsonline"] JSON_FORMAT = Literal["json"] +EXCEL_FORMAT = Literal["excel"] diff --git a/syncmaster/schemas/v1/transfers/file/base.py b/syncmaster/schemas/v1/transfers/file/base.py index e52808da..3a131a4f 100644 --- a/syncmaster/schemas/v1/transfers/file/base.py +++ b/syncmaster/schemas/v1/transfers/file/base.py @@ -7,20 +7,20 @@ from pydantic import BaseModel, Field, field_validator -from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine +from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine # At the moment the ReadTransferSourceParams and ReadTransferTargetParams # classes are identical but may change in the future class ReadFileTransferSource(BaseModel): directory_path: str - file_format: CSV | JSONLine | JSON = Field(..., discriminator="type") + file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type") options: dict[str, Any] class ReadFileTransferTarget(BaseModel): directory_path: str - file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON format is not supported for writing + file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON format is not supported for writing options: dict[str, Any] @@ -28,7 +28,7 @@ class ReadFileTransferTarget(BaseModel): # classes are identical but may change in the future class CreateFileTransferSource(BaseModel): directory_path: str - file_format: CSV | JSONLine | JSON = Field(..., discriminator="type") + file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type") options: dict[str, Any] = Field(default_factory=dict) class Config: @@ -44,7 +44,7 @@ def _directory_path_is_valid_path(cls, value): class CreateFileTransferTarget(BaseModel): directory_path: str - file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET ! + file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET ! options: dict[str, Any] = Field(default_factory=dict) class Config: diff --git a/syncmaster/schemas/v1/transfers/file_format.py b/syncmaster/schemas/v1/transfers/file_format.py index cdf8496d..b906cfbd 100644 --- a/syncmaster/schemas/v1/transfers/file_format.py +++ b/syncmaster/schemas/v1/transfers/file_format.py @@ -4,7 +4,12 @@ from pydantic import BaseModel -from syncmaster.schemas.v1.file_formats import CSV_FORMAT, JSON_FORMAT, JSONLINE_FORMAT +from syncmaster.schemas.v1.file_formats import ( + CSV_FORMAT, + EXCEL_FORMAT, + JSON_FORMAT, + JSONLINE_FORMAT, +) class CSV(BaseModel): @@ -27,3 +32,9 @@ class JSON(BaseModel): type: JSON_FORMAT encoding: str = "utf-8" line_sep: str = "\n" + + +class Excel(BaseModel): + type: EXCEL_FORMAT + include_header: bool + start_cell: str | None = None diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index ad8c2c27..b4b6d34b 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -28,6 +28,24 @@ "directory_path": "/some/pure/path", "file_format": { "type": "csv", + "delimiter": ",", + "encoding": "utf-8", + "quote": '"', + "escape": "\\", + "header": False, + "line_sep": "\n", + }, + "options": { + "some": "option", + }, + }, + { + "type": "s3", + "directory_path": "/some/excel/path", + "file_format": { + "type": "excel", + "include_header": True, + "start_cell": "A1", }, "options": { "some": "option", @@ -94,11 +112,26 @@ async def test_developer_plus_can_create_s3_transfer( "queue_id": transfer.queue_id, } + expected_file_formats = { + "csv": { + "delimiter": ",", + "encoding": "utf-8", + "quote": '"', + "escape": "\\", + "header": False, + "line_sep": "\n", + }, + "excel": { + "include_header": True, + "start_cell": "A1", + }, + } + for params in (transfer.source_params, transfer.target_params): - assert params["type"] == "s3" - assert params["directory_path"] == "/some/pure/path" - assert params["file_format"]["type"] == "csv" + assert params["type"] == target_source_params["type"] + assert params["directory_path"] == target_source_params["directory_path"] assert params["options"] == {"some": "option"} + assert params["file_format"] == expected_file_formats[params["type"]] @pytest.mark.parametrize( @@ -121,6 +154,15 @@ async def test_developer_plus_can_create_s3_transfer( "type": "csv", }, }, + { + "type": "hdfs", + "directory_path": "/some/excel/path", + "file_format": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + }, ], ) async def test_developer_plus_can_create_hdfs_transfer( @@ -183,10 +225,27 @@ async def test_developer_plus_can_create_hdfs_transfer( "queue_id": transfer.queue_id, } + expected_file_formats = { + "csv": { + "type": "csv", + "delimiter": ",", + "encoding": "utf-8", + "quote": '"', + "escape": "\\", + "header": False, + "line_sep": "\n", + }, + "excel": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + } + for params in (transfer.source_params, transfer.target_params): - assert params["type"] == "hdfs" - assert params["directory_path"] == "/some/pure/path" - assert params["file_format"]["type"] == "csv" + assert params["type"] == target_source_params["type"] + assert params["directory_path"] == target_source_params["directory_path"] + assert params["file_format"] == expected_file_formats[params["file_format"]["type"]] assert params["options"] == {} @@ -211,6 +270,14 @@ async def test_developer_plus_can_create_hdfs_transfer( "type": "csv", }, }, + { + "type": "s3", + "directory_path": "some/path", + "file_format": { + "type": "excel", + "include_header": True, + }, + }, ], ) async def test_cannot_create_file_transfer_with_relative_path( diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py index ad68ec29..8424e559 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py @@ -23,6 +23,16 @@ }, "options": {}, }, + { + "type": "s3", + "directory_path": "/some/excel/path", + "file_format": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + "options": {}, + }, ], ) @pytest.mark.parametrize( diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py index cc86107c..90e3c51b 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py @@ -23,6 +23,16 @@ }, "options": {}, }, + { + "type": "s3", + "directory_path": "/some/excel/path", + "file_format": { + "type": "excel", + "include_header": True, + "start_cell": "A1", + }, + "options": {}, + }, ], ) @pytest.mark.parametrize( @@ -54,7 +64,7 @@ async def test_developer_plus_can_update_s3_transfer( "source_params": { "type": "s3", "directory_path": "/some/new/test/directory", - "file_format": {"type": "jsonline"}, + "file_format": create_transfer_data["file_format"], "options": {"some": "option"}, }, }, @@ -65,14 +75,11 @@ async def test_developer_plus_can_update_s3_transfer( source_params.update( { "directory_path": "/some/new/test/directory", - "file_format": { - "encoding": "utf-8", - "line_sep": "\n", - "type": "jsonline", - }, + "file_format": create_transfer_data["file_format"], "options": {"some": "option"}, }, ) + # Assert assert result.status_code == 200 assert result.json() == { From 6086576d521309a42b5f885e4e705fddeef75237 Mon Sep 17 00:00:00 2001 From: Ilyas Gasanov Date: Mon, 25 Nov 2024 08:49:40 +0300 Subject: [PATCH 2/5] [DOP-21442] Add Excel API schema --- docs/changelog/next_release/{139.feature.rst => 140.feature.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/changelog/next_release/{139.feature.rst => 140.feature.rst} (100%) diff --git a/docs/changelog/next_release/139.feature.rst b/docs/changelog/next_release/140.feature.rst similarity index 100% rename from docs/changelog/next_release/139.feature.rst rename to docs/changelog/next_release/140.feature.rst From 84542a98a2486160242253b568a6420355ad93b5 Mon Sep 17 00:00:00 2001 From: Ilyas Gasanov Date: Mon, 25 Nov 2024 09:10:19 +0300 Subject: [PATCH 3/5] [DOP-21442] Add Excel API schema --- tests/test_unit/test_transfers/test_create_transfer.py | 8 ++++++-- .../test_file_transfers/test_create_transfer.py | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_unit/test_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_create_transfer.py index 35b98ea4..3826adde 100644 --- a/tests/test_unit/test_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_create_transfer.py @@ -633,10 +633,14 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json( "message": "Invalid request", "details": [ { - "context": {"discriminator": "'type'", "tag": "json", "expected_tags": "'csv', 'jsonline'"}, + "context": { + "discriminator": "'type'", + "tag": "json", + "expected_tags": "'csv', 'jsonline', 'excel'", + }, "input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"}, "location": ["body", "target_params", "s3", "file_format"], - "message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline'", + "message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel'", "code": "union_tag_invalid", }, ], diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index b4b6d34b..df3eb987 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -114,6 +114,7 @@ async def test_developer_plus_can_create_s3_transfer( expected_file_formats = { "csv": { + "type": "csv", "delimiter": ",", "encoding": "utf-8", "quote": '"', @@ -122,6 +123,7 @@ async def test_developer_plus_can_create_s3_transfer( "line_sep": "\n", }, "excel": { + "type": "excel", "include_header": True, "start_cell": "A1", }, @@ -131,7 +133,7 @@ async def test_developer_plus_can_create_s3_transfer( assert params["type"] == target_source_params["type"] assert params["directory_path"] == target_source_params["directory_path"] assert params["options"] == {"some": "option"} - assert params["file_format"] == expected_file_formats[params["type"]] + assert params["file_format"] == expected_file_formats[params["file_format"]["type"]] @pytest.mark.parametrize( From 83c62a7e326a33ed473a60c820b8adc1ebfa61cc Mon Sep 17 00:00:00 2001 From: Ilyas Gasanov Date: Mon, 25 Nov 2024 11:58:04 +0300 Subject: [PATCH 4/5] [DOP-21442] Add Excel API schema --- syncmaster/schemas/v1/transfers/file_format.py | 4 ++-- .../file_df_connection/generate_files.py | 18 +++++++++--------- .../test_run_transfer/conftest.py | 4 ++-- .../test_create_transfer.py | 6 +++--- .../test_file_transfers/test_read_transfer.py | 2 +- .../test_update_transfer.py | 2 +- .../transfer_fixtures/transfers_fixture.py | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/syncmaster/schemas/v1/transfers/file_format.py b/syncmaster/schemas/v1/transfers/file_format.py index b906cfbd..2c91ef8d 100644 --- a/syncmaster/schemas/v1/transfers/file_format.py +++ b/syncmaster/schemas/v1/transfers/file_format.py @@ -18,7 +18,7 @@ class CSV(BaseModel): encoding: str = "utf-8" quote: str = '"' escape: str = "\\" - header: bool = False + include_header: bool = False line_sep: str = "\n" @@ -36,5 +36,5 @@ class JSON(BaseModel): class Excel(BaseModel): type: EXCEL_FORMAT - include_header: bool + include_header: bool = False start_cell: str | None = None diff --git a/tests/resources/file_df_connection/generate_files.py b/tests/resources/file_df_connection/generate_files.py index b4782d6c..12d2654a 100644 --- a/tests/resources/file_df_connection/generate_files.py +++ b/tests/resources/file_df_connection/generate_files.py @@ -103,11 +103,11 @@ def _to_string(obj): return obj -def _write_csv(data: list[dict], file: TextIO, header: bool = False, **kwargs) -> None: +def _write_csv(data: list[dict], file: TextIO, include_header: bool = False, **kwargs) -> None: columns = list(data[0].keys()) writer = csv.DictWriter(file, fieldnames=columns, lineterminator="\n", **kwargs) - if header: + if include_header: writer.writeheader() for row in data: @@ -123,7 +123,7 @@ def save_as_csv_without_header(data: list[dict], path: Path) -> None: def save_as_csv_with_header(data: list[dict], path: Path) -> None: path.mkdir(parents=True, exist_ok=True) with open(path / "file.csv", "w", newline="") as file: - _write_csv(data, file, header=True) + _write_csv(data, file, include_header=True) def save_as_csv_with_delimiter(data: list[dict], path: Path) -> None: @@ -403,12 +403,12 @@ def save_as_xlsx(data: list[dict], path: Path) -> None: shutil.rmtree(root, ignore_errors=True) root.mkdir(parents=True, exist_ok=True) - save_as_xlsx_with_options(data, root / "without_header", header=False) - save_as_xlsx_with_options(data, root / "with_header", header=True) + save_as_xlsx_with_options(data, root / "without_header", include_header=False) + save_as_xlsx_with_options(data, root / "with_header", include_header=True) save_as_xlsx_with_options( data, root / "with_data_address", - header=False, + include_header=False, sheet_name="ABC", startcol=10, startrow=5, @@ -420,12 +420,12 @@ def save_as_xls(data: list[dict], path: Path) -> None: shutil.rmtree(root, ignore_errors=True) root.mkdir(parents=True, exist_ok=True) - save_as_xls_with_options(data, root / "without_header", header=False) - save_as_xls_with_options(data, root / "with_header", header=True) + save_as_xls_with_options(data, root / "without_header", include_header=False) + save_as_xls_with_options(data, root / "with_header", include_header=True) save_as_xls_with_options( data, root / "with_data_address", - header=False, + include_header=False, sheet_name="ABC", startcol=10, startrow=5, diff --git a/tests/test_integration/test_run_transfer/conftest.py b/tests/test_integration/test_run_transfer/conftest.py index 5927dd40..5daf31b7 100644 --- a/tests/test_integration/test_run_transfer/conftest.py +++ b/tests/test_integration/test_run_transfer/conftest.py @@ -515,7 +515,7 @@ def source_file_format(request: FixtureRequest): if name == "csv": return "csv", CSV( lineSep="\n", - header=True, + include_header=True, **params, ) @@ -542,7 +542,7 @@ def target_file_format(request: FixtureRequest): if name == "csv": return "csv", CSV( lineSep="\n", - header=True, + include_header=True, timestampFormat="yyyy-MM-dd'T'HH:mm:ss.SSSSSS+00:00", **params, ) diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index df3eb987..f60d8782 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -32,7 +32,7 @@ "encoding": "utf-8", "quote": '"', "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", }, "options": { @@ -119,7 +119,7 @@ async def test_developer_plus_can_create_s3_transfer( "encoding": "utf-8", "quote": '"', "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", }, "excel": { @@ -234,7 +234,7 @@ async def test_developer_plus_can_create_hdfs_transfer( "encoding": "utf-8", "quote": '"', "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", }, "excel": { diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py index 8424e559..bb5e9e3a 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py @@ -16,7 +16,7 @@ "delimiter": ",", "encoding": "utf-8", "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", "quote": '"', "type": "csv", diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py index 90e3c51b..182bab00 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py @@ -16,7 +16,7 @@ "delimiter": ",", "encoding": "utf-8", "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", "quote": '"', "type": "csv", diff --git a/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py b/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py index 55f37e4f..39568e5c 100644 --- a/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py +++ b/tests/test_unit/test_transfers/transfer_fixtures/transfers_fixture.py @@ -57,7 +57,7 @@ async def group_transfers( "delimiter": ",", "encoding": "utf-8", "escape": "\\", - "header": False, + "include_header": False, "line_sep": "\n", "quote": '"', "type": "csv", From 98d4648092edc3132a00dda8bfb4cecc8091cf1a Mon Sep 17 00:00:00 2001 From: Ilyas Gasanov Date: Mon, 25 Nov 2024 12:29:46 +0300 Subject: [PATCH 5/5] [DOP-21442] Add Excel API schema --- tests/test_integration/test_run_transfer/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_integration/test_run_transfer/conftest.py b/tests/test_integration/test_run_transfer/conftest.py index 5daf31b7..5927dd40 100644 --- a/tests/test_integration/test_run_transfer/conftest.py +++ b/tests/test_integration/test_run_transfer/conftest.py @@ -515,7 +515,7 @@ def source_file_format(request: FixtureRequest): if name == "csv": return "csv", CSV( lineSep="\n", - include_header=True, + header=True, **params, ) @@ -542,7 +542,7 @@ def target_file_format(request: FixtureRequest): if name == "csv": return "csv", CSV( lineSep="\n", - include_header=True, + header=True, timestampFormat="yyyy-MM-dd'T'HH:mm:ss.SSSSSS+00:00", **params, )