Skip to content

Commit 2d5027c

Browse files
author
Patryk Jatczak
committed
Merge branch 'develop' of https://dev.azure.com/imagetemplates/documati.io/_git/dqoado into feature/10970_DuckDb_connector_suppors_simple_import_of_metadata_by_using_scan_csv_calls_in_the_importTables_operation_in_the_the_duckdb_connector
2 parents d9b4988 + 15b9af3 commit 2d5027c

File tree

945 files changed

+17710
-22231
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

945 files changed

+17710
-22231
lines changed

distribution/python/dqops/client/client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class Client:
3838
_base_url: str
3939
_cookies: Dict[str, str] = field(factory=dict, kw_only=True)
4040
_headers: Dict[str, str] = field(factory=dict, kw_only=True)
41-
_timeout: Optional[httpx.Timeout] = field(default=httpx.Timeout(120.0), kw_only=True)
41+
_timeout: Optional[httpx.Timeout] = field(default=None, kw_only=True)
4242
_verify_ssl: Union[str, bool, ssl.SSLContext] = field(default=True, kw_only=True)
4343
_follow_redirects: bool = field(default=False, kw_only=True)
4444
_httpx_args: Dict[str, Any] = field(factory=dict, kw_only=True)
@@ -168,7 +168,7 @@ class AuthenticatedClient:
168168
_base_url: str
169169
_cookies: Dict[str, str] = field(factory=dict, kw_only=True)
170170
_headers: Dict[str, str] = field(factory=dict, kw_only=True)
171-
_timeout: Optional[httpx.Timeout] = field(default=httpx.Timeout(120.0), kw_only=True)
171+
_timeout: Optional[httpx.Timeout] = field(default=None, kw_only=True)
172172
_verify_ssl: Union[str, bool, ssl.SSLContext] = field(default=True, kw_only=True)
173173
_follow_redirects: bool = field(default=False, kw_only=True)
174174
_httpx_args: Dict[str, Any] = field(factory=dict, kw_only=True)

distribution/python/dqops/client/models/__init__.py

+6
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,8 @@
13501350
from .dqo_user_role import DqoUserRole
13511351
from .duckdb_parameters_spec import DuckdbParametersSpec
13521352
from .duckdb_parameters_spec_properties import DuckdbParametersSpecProperties
1353+
from .duckdb_read_mode import DuckdbReadMode
1354+
from .duckdb_source_files_type import DuckdbSourceFilesType
13531355
from .duration import Duration
13541356
from .effective_schedule_level_model import EffectiveScheduleLevelModel
13551357
from .effective_schedule_model import EffectiveScheduleModel
@@ -1382,6 +1384,7 @@
13821384
from .incident_webhook_notifications_spec import IncidentWebhookNotificationsSpec
13831385
from .incidents_per_connection_model import IncidentsPerConnectionModel
13841386
from .json_file_format_spec import JsonFileFormatSpec
1387+
from .json_file_format_spec_columns import JsonFileFormatSpecColumns
13851388
from .max_count_rule_0_error_parameters_spec import MaxCountRule0ErrorParametersSpec
13861389
from .max_count_rule_0_warning_parameters_spec import MaxCountRule0WarningParametersSpec
13871390
from .max_count_rule_100_parameters_spec import MaxCountRule100ParametersSpec
@@ -2437,6 +2440,8 @@
24372440
"DqoUserRole",
24382441
"DuckdbParametersSpec",
24392442
"DuckdbParametersSpecProperties",
2443+
"DuckdbReadMode",
2444+
"DuckdbSourceFilesType",
24402445
"Duration",
24412446
"EffectiveScheduleLevelModel",
24422447
"EffectiveScheduleModel",
@@ -2469,6 +2474,7 @@
24692474
"IncidentStatus",
24702475
"IncidentWebhookNotificationsSpec",
24712476
"JsonFileFormatSpec",
2477+
"JsonFileFormatSpecColumns",
24722478
"MaxCountRule0ErrorParametersSpec",
24732479
"MaxCountRule0WarningParametersSpec",
24742480
"MaxCountRule100ParametersSpec",

distribution/python/dqops/client/models/connection_model.py

+18
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from ..models.delete_stored_data_queue_job_parameters import (
1414
DeleteStoredDataQueueJobParameters,
1515
)
16+
from ..models.duckdb_parameters_spec import DuckdbParametersSpec
1617
from ..models.mysql_parameters_spec import MysqlParametersSpec
1718
from ..models.oracle_parameters_spec import OracleParametersSpec
1819
from ..models.postgresql_parameters_spec import PostgresqlParametersSpec
@@ -43,6 +44,7 @@ class ConnectionModel:
4344
bigquery (Union[Unset, BigQueryParametersSpec]):
4445
snowflake (Union[Unset, SnowflakeParametersSpec]):
4546
postgresql (Union[Unset, PostgresqlParametersSpec]):
47+
duckdb (Union[Unset, DuckdbParametersSpec]):
4648
redshift (Union[Unset, RedshiftParametersSpec]):
4749
sqlserver (Union[Unset, SqlServerParametersSpec]):
4850
presto (Union[Unset, PrestoParametersSpec]):
@@ -79,6 +81,7 @@ class ConnectionModel:
7981
bigquery: Union[Unset, "BigQueryParametersSpec"] = UNSET
8082
snowflake: Union[Unset, "SnowflakeParametersSpec"] = UNSET
8183
postgresql: Union[Unset, "PostgresqlParametersSpec"] = UNSET
84+
duckdb: Union[Unset, "DuckdbParametersSpec"] = UNSET
8285
redshift: Union[Unset, "RedshiftParametersSpec"] = UNSET
8386
sqlserver: Union[Unset, "SqlServerParametersSpec"] = UNSET
8487
presto: Union[Unset, "PrestoParametersSpec"] = UNSET
@@ -122,6 +125,10 @@ def to_dict(self) -> Dict[str, Any]:
122125
if not isinstance(self.postgresql, Unset):
123126
postgresql = self.postgresql.to_dict()
124127

128+
duckdb: Union[Unset, Dict[str, Any]] = UNSET
129+
if not isinstance(self.duckdb, Unset):
130+
duckdb = self.duckdb.to_dict()
131+
125132
redshift: Union[Unset, Dict[str, Any]] = UNSET
126133
if not isinstance(self.redshift, Unset):
127134
redshift = self.redshift.to_dict()
@@ -209,6 +216,8 @@ def to_dict(self) -> Dict[str, Any]:
209216
field_dict["snowflake"] = snowflake
210217
if postgresql is not UNSET:
211218
field_dict["postgresql"] = postgresql
219+
if duckdb is not UNSET:
220+
field_dict["duckdb"] = duckdb
212221
if redshift is not UNSET:
213222
field_dict["redshift"] = redshift
214223
if sqlserver is not UNSET:
@@ -266,6 +275,7 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
266275
from ..models.delete_stored_data_queue_job_parameters import (
267276
DeleteStoredDataQueueJobParameters,
268277
)
278+
from ..models.duckdb_parameters_spec import DuckdbParametersSpec
269279
from ..models.mysql_parameters_spec import MysqlParametersSpec
270280
from ..models.oracle_parameters_spec import OracleParametersSpec
271281
from ..models.postgresql_parameters_spec import PostgresqlParametersSpec
@@ -314,6 +324,13 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
314324
else:
315325
postgresql = PostgresqlParametersSpec.from_dict(_postgresql)
316326

327+
_duckdb = d.pop("duckdb", UNSET)
328+
duckdb: Union[Unset, DuckdbParametersSpec]
329+
if isinstance(_duckdb, Unset):
330+
duckdb = UNSET
331+
else:
332+
duckdb = DuckdbParametersSpec.from_dict(_duckdb)
333+
317334
_redshift = d.pop("redshift", UNSET)
318335
redshift: Union[Unset, RedshiftParametersSpec]
319336
if isinstance(_redshift, Unset):
@@ -452,6 +469,7 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
452469
bigquery=bigquery,
453470
snowflake=snowflake,
454471
postgresql=postgresql,
472+
duckdb=duckdb,
455473
redshift=redshift,
456474
sqlserver=sqlserver,
457475
presto=presto,

distribution/python/dqops/client/models/csv_file_format_spec.py

+26-18
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,33 @@
1414

1515
@_attrs_define
1616
class CsvFileFormatSpec:
17-
"""
17+
r"""
1818
Attributes:
19-
all_varchar (Union[Unset, bool]):
20-
allow_quoted_nulls (Union[Unset, bool]):
21-
auto_detect (Union[Unset, bool]):
22-
columns (Union[Unset, CsvFileFormatSpecColumns]):
23-
compression (Union[Unset, str]):
24-
dateformat (Union[Unset, str]):
25-
decimal_separator (Union[Unset, str]):
26-
delim (Union[Unset, str]):
27-
escape (Union[Unset, str]):
28-
filename (Union[Unset, bool]):
29-
header (Union[Unset, bool]):
30-
hive_partitioning (Union[Unset, bool]):
31-
ignore_errors (Union[Unset, bool]):
32-
new_line (Union[Unset, str]):
33-
quote (Union[Unset, str]):
34-
skip (Union[Unset, int]):
35-
timestampformat (Union[Unset, str]):
19+
all_varchar (Union[Unset, bool]): Option to skip type detection for CSV parsing and assume all columns to be of
20+
type VARCHAR.
21+
allow_quoted_nulls (Union[Unset, bool]): Option to allow the conversion of quoted values to NULL values.
22+
auto_detect (Union[Unset, bool]): Enables auto detection of CSV parameters.
23+
columns (Union[Unset, CsvFileFormatSpecColumns]): A struct that specifies the column names and column types
24+
contained within the CSV file (e.g., {'col1': 'INTEGER', 'col2': 'VARCHAR'}). Using this option implies that
25+
auto detection is not used.
26+
compression (Union[Unset, str]): The compression type for the file. By default this will be detected
27+
automatically from the file extension (e.g., t.csv.gz will use gzip, t.csv will use none). Options are none,
28+
gzip, zstd.
29+
dateformat (Union[Unset, str]): Specifies the date format to use when parsing dates.
30+
decimal_separator (Union[Unset, str]): The decimal separator of numbers.
31+
delim (Union[Unset, str]): Specifies the string that separates columns within each row (line) of the file.
32+
escape (Union[Unset, str]): Specifies the string that should appear before a data character sequence that
33+
matches the quote value.
34+
filename (Union[Unset, bool]): Whether or not an extra filename column should be included in the result.
35+
header (Union[Unset, bool]): Specifies that the file contains a header line with the names of each column in the
36+
file.
37+
hive_partitioning (Union[Unset, bool]): Whether or not to interpret the path as a hive partitioned path.
38+
ignore_errors (Union[Unset, bool]): Option to ignore any parsing errors encountered - and instead ignore rows
39+
with errors.
40+
new_line (Union[Unset, str]): Set the new line character(s) in the file. Options are '\r','\n', or '\r\n'.
41+
quote (Union[Unset, str]): Specifies the quoting string to be used when a data value is quoted.
42+
skip (Union[Unset, int]): The number of lines at the top of the file to skip.
43+
timestampformat (Union[Unset, str]): Specifies the date format to use when parsing timestamps.
3644
"""
3745

3846
all_varchar: Union[Unset, bool] = UNSET

distribution/python/dqops/client/models/csv_file_format_spec_columns.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99
@_attrs_define
1010
class CsvFileFormatSpecColumns:
11-
""" """
11+
"""A struct that specifies the column names and column types contained within the CSV file (e.g., {'col1': 'INTEGER',
12+
'col2': 'VARCHAR'}). Using this option implies that auto detection is not used.
13+
14+
"""
1215

1316
additional_properties: Dict[str, str] = _attrs_field(init=False, factory=dict)
1417

distribution/python/dqops/client/models/duckdb_parameters_spec.py

+33-9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from attrs import define as _attrs_define
44
from attrs import field as _attrs_field
55

6+
from ..models.duckdb_read_mode import DuckdbReadMode
7+
from ..models.duckdb_source_files_type import DuckdbSourceFilesType
68
from ..types import UNSET, Unset
79

810
if TYPE_CHECKING:
@@ -18,9 +20,8 @@
1820
class DuckdbParametersSpec:
1921
"""
2022
Attributes:
21-
in_memory (Union[Unset, bool]): To use the special value :memory: to create an in-memory database where no data
22-
is persisted to disk (i.e., all data is lost when you exit the process). The value can be in the
23-
${ENVIRONMENT_VARIABLE_NAME} format to use dynamic substitution.
23+
read_mode (Union[Unset, DuckdbReadMode]):
24+
source_files_type (Union[Unset, DuckdbSourceFilesType]):
2425
database (Union[Unset, str]): DuckDB database name. The value can be in the ${ENVIRONMENT_VARIABLE_NAME} format
2526
to use dynamic substitution.
2627
options (Union[Unset, str]): DuckDB connection 'options' initialization parameter. For example setting this to
@@ -30,14 +31,22 @@ class DuckdbParametersSpec:
3031
to the JDBC connection string, a key/value dictionary.
3132
"""
3233

33-
in_memory: Union[Unset, bool] = UNSET
34+
read_mode: Union[Unset, DuckdbReadMode] = UNSET
35+
source_files_type: Union[Unset, DuckdbSourceFilesType] = UNSET
3436
database: Union[Unset, str] = UNSET
3537
options: Union[Unset, str] = UNSET
3638
properties: Union[Unset, "DuckdbParametersSpecProperties"] = UNSET
3739
additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)
3840

3941
def to_dict(self) -> Dict[str, Any]:
40-
in_memory = self.in_memory
42+
read_mode: Union[Unset, str] = UNSET
43+
if not isinstance(self.read_mode, Unset):
44+
read_mode = self.read_mode.value
45+
46+
source_files_type: Union[Unset, str] = UNSET
47+
if not isinstance(self.source_files_type, Unset):
48+
source_files_type = self.source_files_type.value
49+
4150
database = self.database
4251
options = self.options
4352
properties: Union[Unset, Dict[str, Any]] = UNSET
@@ -47,8 +56,10 @@ def to_dict(self) -> Dict[str, Any]:
4756
field_dict: Dict[str, Any] = {}
4857
field_dict.update(self.additional_properties)
4958
field_dict.update({})
50-
if in_memory is not UNSET:
51-
field_dict["in_memory"] = in_memory
59+
if read_mode is not UNSET:
60+
field_dict["read_mode"] = read_mode
61+
if source_files_type is not UNSET:
62+
field_dict["source_files_type"] = source_files_type
5263
if database is not UNSET:
5364
field_dict["database"] = database
5465
if options is not UNSET:
@@ -65,7 +76,19 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
6576
)
6677

6778
d = src_dict.copy()
68-
in_memory = d.pop("in_memory", UNSET)
79+
_read_mode = d.pop("read_mode", UNSET)
80+
read_mode: Union[Unset, DuckdbReadMode]
81+
if isinstance(_read_mode, Unset):
82+
read_mode = UNSET
83+
else:
84+
read_mode = DuckdbReadMode(_read_mode)
85+
86+
_source_files_type = d.pop("source_files_type", UNSET)
87+
source_files_type: Union[Unset, DuckdbSourceFilesType]
88+
if isinstance(_source_files_type, Unset):
89+
source_files_type = UNSET
90+
else:
91+
source_files_type = DuckdbSourceFilesType(_source_files_type)
6992

7093
database = d.pop("database", UNSET)
7194

@@ -79,7 +102,8 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
79102
properties = DuckdbParametersSpecProperties.from_dict(_properties)
80103

81104
duckdb_parameters_spec = cls(
82-
in_memory=in_memory,
105+
read_mode=read_mode,
106+
source_files_type=source_files_type,
83107
database=database,
84108
options=options,
85109
properties=properties,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from enum import Enum
2+
3+
4+
class DuckdbReadMode(str, Enum):
5+
FILES = "files"
6+
IN_MEMORY = "in_memory"
7+
8+
def __str__(self) -> str:
9+
return str(self.value)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from enum import Enum
2+
3+
4+
class DuckdbSourceFilesType(str, Enum):
5+
CSV = "csv"
6+
JSON = "json"
7+
PARQUET = "parquet"
8+
9+
def __str__(self) -> str:
10+
return str(self.value)

distribution/python/dqops/client/models/file_format_spec.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,12 @@ class FileFormatSpec:
2121
csv_file_format (Union[Unset, CsvFileFormatSpec]):
2222
json_file_format (Union[Unset, JsonFileFormatSpec]):
2323
parquet_file_format (Union[Unset, ParquetFileFormatSpec]):
24-
file_path_list (Union[Unset, List[str]]): The list of paths to files with data that are used as a source.
25-
file_paths (Union[Unset, List[str]]):
24+
file_paths (Union[Unset, List[str]]): The list of paths to files with data that are used as a source.
2625
"""
2726

2827
csv_file_format: Union[Unset, "CsvFileFormatSpec"] = UNSET
2928
json_file_format: Union[Unset, "JsonFileFormatSpec"] = UNSET
3029
parquet_file_format: Union[Unset, "ParquetFileFormatSpec"] = UNSET
31-
file_path_list: Union[Unset, List[str]] = UNSET
3230
file_paths: Union[Unset, List[str]] = UNSET
3331
additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict)
3432

@@ -45,10 +43,6 @@ def to_dict(self) -> Dict[str, Any]:
4543
if not isinstance(self.parquet_file_format, Unset):
4644
parquet_file_format = self.parquet_file_format.to_dict()
4745

48-
file_path_list: Union[Unset, List[str]] = UNSET
49-
if not isinstance(self.file_path_list, Unset):
50-
file_path_list = self.file_path_list
51-
5246
file_paths: Union[Unset, List[str]] = UNSET
5347
if not isinstance(self.file_paths, Unset):
5448
file_paths = self.file_paths
@@ -62,8 +56,6 @@ def to_dict(self) -> Dict[str, Any]:
6256
field_dict["json_file_format"] = json_file_format
6357
if parquet_file_format is not UNSET:
6458
field_dict["parquet_file_format"] = parquet_file_format
65-
if file_path_list is not UNSET:
66-
field_dict["file_path_list"] = file_path_list
6759
if file_paths is not UNSET:
6860
field_dict["file_paths"] = file_paths
6961

@@ -97,15 +89,12 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
9789
else:
9890
parquet_file_format = ParquetFileFormatSpec.from_dict(_parquet_file_format)
9991

100-
file_path_list = cast(List[str], d.pop("file_path_list", UNSET))
101-
10292
file_paths = cast(List[str], d.pop("file_paths", UNSET))
10393

10494
file_format_spec = cls(
10595
csv_file_format=csv_file_format,
10696
json_file_format=json_file_format,
10797
parquet_file_format=parquet_file_format,
108-
file_path_list=file_path_list,
10998
file_paths=file_paths,
11099
)
111100

0 commit comments

Comments
 (0)