Skip to content

Commit

Permalink
[DOP-21442] Add Excel API schema (#140)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasDevelopment authored Nov 25, 2024
1 parent 32726c9 commit 455f99e
Show file tree
Hide file tree
Showing 11 changed files with 137 additions and 34 deletions.
1 change: 1 addition & 0 deletions docs/changelog/next_release/140.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Excel API schema
2 changes: 1 addition & 1 deletion syncmaster/schemas/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
PostgresReadTransferSourceAndTarget,
ReadDBTransfer,
)
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine
from syncmaster.schemas.v1.transfers.run import (
CreateRunSchema,
ReadRunSchema,
Expand Down
1 change: 1 addition & 0 deletions syncmaster/schemas/v1/file_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
CSV_FORMAT = Literal["csv"]
JSONLINE_FORMAT = Literal["jsonline"]
JSON_FORMAT = Literal["json"]
EXCEL_FORMAT = Literal["excel"]
10 changes: 5 additions & 5 deletions syncmaster/schemas/v1/transfers/file/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,28 @@

from pydantic import BaseModel, Field, field_validator

from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine
from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, Excel, JSONLine


# At the moment the ReadTransferSourceParams and ReadTransferTargetParams
# classes are identical but may change in the future
class ReadFileTransferSource(BaseModel):
directory_path: str
file_format: CSV | JSONLine | JSON = Field(..., discriminator="type")
file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type")
options: dict[str, Any]


class ReadFileTransferTarget(BaseModel):
directory_path: str
file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON format is not supported for writing
file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON format is not supported for writing
options: dict[str, Any]


# At the moment the CreateTransferSourceParams and CreateTransferTargetParams
# classes are identical but may change in the future
class CreateFileTransferSource(BaseModel):
directory_path: str
file_format: CSV | JSONLine | JSON = Field(..., discriminator="type")
file_format: CSV | JSONLine | JSON | Excel = Field(..., discriminator="type")
options: dict[str, Any] = Field(default_factory=dict)

class Config:
Expand All @@ -44,7 +44,7 @@ def _directory_path_is_valid_path(cls, value):

class CreateFileTransferTarget(BaseModel):
directory_path: str
file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET !
file_format: CSV | JSONLine | Excel = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET !
options: dict[str, Any] = Field(default_factory=dict)

class Config:
Expand Down
15 changes: 13 additions & 2 deletions syncmaster/schemas/v1/transfers/file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

from pydantic import BaseModel

from syncmaster.schemas.v1.file_formats import CSV_FORMAT, JSON_FORMAT, JSONLINE_FORMAT
from syncmaster.schemas.v1.file_formats import (
CSV_FORMAT,
EXCEL_FORMAT,
JSON_FORMAT,
JSONLINE_FORMAT,
)


class CSV(BaseModel):
Expand All @@ -13,7 +18,7 @@ class CSV(BaseModel):
encoding: str = "utf-8"
quote: str = '"'
escape: str = "\\"
header: bool = False
include_header: bool = False
line_sep: str = "\n"


Expand All @@ -27,3 +32,9 @@ class JSON(BaseModel):
type: JSON_FORMAT
encoding: str = "utf-8"
line_sep: str = "\n"


class Excel(BaseModel):
type: EXCEL_FORMAT
include_header: bool = False
start_cell: str | None = None
18 changes: 9 additions & 9 deletions tests/resources/file_df_connection/generate_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@ def _to_string(obj):
return obj


def _write_csv(data: list[dict], file: TextIO, header: bool = False, **kwargs) -> None:
def _write_csv(data: list[dict], file: TextIO, include_header: bool = False, **kwargs) -> None:
columns = list(data[0].keys())
writer = csv.DictWriter(file, fieldnames=columns, lineterminator="\n", **kwargs)

if header:
if include_header:
writer.writeheader()

for row in data:
Expand All @@ -123,7 +123,7 @@ def save_as_csv_without_header(data: list[dict], path: Path) -> None:
def save_as_csv_with_header(data: list[dict], path: Path) -> None:
path.mkdir(parents=True, exist_ok=True)
with open(path / "file.csv", "w", newline="") as file:
_write_csv(data, file, header=True)
_write_csv(data, file, include_header=True)


def save_as_csv_with_delimiter(data: list[dict], path: Path) -> None:
Expand Down Expand Up @@ -403,12 +403,12 @@ def save_as_xlsx(data: list[dict], path: Path) -> None:
shutil.rmtree(root, ignore_errors=True)
root.mkdir(parents=True, exist_ok=True)

save_as_xlsx_with_options(data, root / "without_header", header=False)
save_as_xlsx_with_options(data, root / "with_header", header=True)
save_as_xlsx_with_options(data, root / "without_header", include_header=False)
save_as_xlsx_with_options(data, root / "with_header", include_header=True)
save_as_xlsx_with_options(
data,
root / "with_data_address",
header=False,
include_header=False,
sheet_name="ABC",
startcol=10,
startrow=5,
Expand All @@ -420,12 +420,12 @@ def save_as_xls(data: list[dict], path: Path) -> None:
shutil.rmtree(root, ignore_errors=True)
root.mkdir(parents=True, exist_ok=True)

save_as_xls_with_options(data, root / "without_header", header=False)
save_as_xls_with_options(data, root / "with_header", header=True)
save_as_xls_with_options(data, root / "without_header", include_header=False)
save_as_xls_with_options(data, root / "with_header", include_header=True)
save_as_xls_with_options(
data,
root / "with_data_address",
header=False,
include_header=False,
sheet_name="ABC",
startcol=10,
startrow=5,
Expand Down
8 changes: 6 additions & 2 deletions tests/test_unit/test_transfers/test_create_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,10 +633,14 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json(
"message": "Invalid request",
"details": [
{
"context": {"discriminator": "'type'", "tag": "json", "expected_tags": "'csv', 'jsonline'"},
"context": {
"discriminator": "'type'",
"tag": "json",
"expected_tags": "'csv', 'jsonline', 'excel'",
},
"input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"},
"location": ["body", "target_params", "s3", "file_format"],
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline'",
"message": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline', 'excel'",
"code": "union_tag_invalid",
},
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@
"directory_path": "/some/pure/path",
"file_format": {
"type": "csv",
"delimiter": ",",
"encoding": "utf-8",
"quote": '"',
"escape": "\\",
"include_header": False,
"line_sep": "\n",
},
"options": {
"some": "option",
},
},
{
"type": "s3",
"directory_path": "/some/excel/path",
"file_format": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
"options": {
"some": "option",
Expand Down Expand Up @@ -94,11 +112,28 @@ async def test_developer_plus_can_create_s3_transfer(
"queue_id": transfer.queue_id,
}

expected_file_formats = {
"csv": {
"type": "csv",
"delimiter": ",",
"encoding": "utf-8",
"quote": '"',
"escape": "\\",
"include_header": False,
"line_sep": "\n",
},
"excel": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
}

for params in (transfer.source_params, transfer.target_params):
assert params["type"] == "s3"
assert params["directory_path"] == "/some/pure/path"
assert params["file_format"]["type"] == "csv"
assert params["type"] == target_source_params["type"]
assert params["directory_path"] == target_source_params["directory_path"]
assert params["options"] == {"some": "option"}
assert params["file_format"] == expected_file_formats[params["file_format"]["type"]]


@pytest.mark.parametrize(
Expand All @@ -121,6 +156,15 @@ async def test_developer_plus_can_create_s3_transfer(
"type": "csv",
},
},
{
"type": "hdfs",
"directory_path": "/some/excel/path",
"file_format": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
},
],
)
async def test_developer_plus_can_create_hdfs_transfer(
Expand Down Expand Up @@ -183,10 +227,27 @@ async def test_developer_plus_can_create_hdfs_transfer(
"queue_id": transfer.queue_id,
}

expected_file_formats = {
"csv": {
"type": "csv",
"delimiter": ",",
"encoding": "utf-8",
"quote": '"',
"escape": "\\",
"include_header": False,
"line_sep": "\n",
},
"excel": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
}

for params in (transfer.source_params, transfer.target_params):
assert params["type"] == "hdfs"
assert params["directory_path"] == "/some/pure/path"
assert params["file_format"]["type"] == "csv"
assert params["type"] == target_source_params["type"]
assert params["directory_path"] == target_source_params["directory_path"]
assert params["file_format"] == expected_file_formats[params["file_format"]["type"]]
assert params["options"] == {}


Expand All @@ -211,6 +272,14 @@ async def test_developer_plus_can_create_hdfs_transfer(
"type": "csv",
},
},
{
"type": "s3",
"directory_path": "some/path",
"file_format": {
"type": "excel",
"include_header": True,
},
},
],
)
async def test_cannot_create_file_transfer_with_relative_path(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,23 @@
"delimiter": ",",
"encoding": "utf-8",
"escape": "\\",
"header": False,
"include_header": False,
"line_sep": "\n",
"quote": '"',
"type": "csv",
},
"options": {},
},
{
"type": "s3",
"directory_path": "/some/excel/path",
"file_format": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
"options": {},
},
],
)
@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,23 @@
"delimiter": ",",
"encoding": "utf-8",
"escape": "\\",
"header": False,
"include_header": False,
"line_sep": "\n",
"quote": '"',
"type": "csv",
},
"options": {},
},
{
"type": "s3",
"directory_path": "/some/excel/path",
"file_format": {
"type": "excel",
"include_header": True,
"start_cell": "A1",
},
"options": {},
},
],
)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -54,7 +64,7 @@ async def test_developer_plus_can_update_s3_transfer(
"source_params": {
"type": "s3",
"directory_path": "/some/new/test/directory",
"file_format": {"type": "jsonline"},
"file_format": create_transfer_data["file_format"],
"options": {"some": "option"},
},
},
Expand All @@ -65,14 +75,11 @@ async def test_developer_plus_can_update_s3_transfer(
source_params.update(
{
"directory_path": "/some/new/test/directory",
"file_format": {
"encoding": "utf-8",
"line_sep": "\n",
"type": "jsonline",
},
"file_format": create_transfer_data["file_format"],
"options": {"some": "option"},
},
)

# Assert
assert result.status_code == 200
assert result.json() == {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async def group_transfers(
"delimiter": ",",
"encoding": "utf-8",
"escape": "\\",
"header": False,
"include_header": False,
"line_sep": "\n",
"quote": '"',
"type": "csv",
Expand Down

0 comments on commit 455f99e

Please sign in to comment.