Skip to content

Commit 898e7df

Browse files
committed
Analyze nested columns in JSON files.
1 parent 35d6320 commit 898e7df

File tree

12 files changed

+881
-8
lines changed

12 files changed

+881
-8
lines changed

dqops/src/main/frontend/src/pages/TableColumnsView/TableColumns.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ const TableColumns = ({
181181
columnHash: Number(hashData?.[i]),
182182
isColumnSelected: false,
183183
dimentions: rewriteDimensions(status)[columnNameData?.[i] ?? ''],
184-
labels: getLabelsOverview(columns[i].labels ?? [])
184+
labels: getLabelsOverview(columns[i]?.labels ?? [])
185185
};
186186

187187
dataArray.push(newData);

dqops/src/main/java/com/dqops/connectors/duckdb/DuckdbSourceConnection.java

+38-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import com.dqops.connectors.SourceTableModel;
2020
import com.dqops.connectors.duckdb.fileslisting.AwsTablesLister;
2121
import com.dqops.connectors.duckdb.fileslisting.LocalSystemTablesLister;
22+
import com.dqops.connectors.duckdb.schema.DuckDBDataTypeParser;
23+
import com.dqops.connectors.duckdb.schema.DuckDBField;
2224
import com.dqops.connectors.jdbc.AbstractJdbcSourceConnection;
2325
import com.dqops.connectors.jdbc.JdbcConnectionPool;
2426
import com.dqops.connectors.jdbc.JdbcQueryFailedException;
@@ -66,6 +68,7 @@ public class DuckdbSourceConnection extends AbstractJdbcSourceConnection {
6668
private static final Object registerExtensionsLock = new Object();
6769
private static boolean extensionsRegistered = false;
6870
private final DqoDuckdbConfiguration dqoDuckdbConfiguration;
71+
private final DuckDBDataTypeParser dataTypeParser;
6972
private static final Object settingsExecutionLock = new Object();
7073
private static final boolean settingsConfigured = false;
7174
private static final String temporaryDirectoryPrefix = "dqops_duckdb_temp_";
@@ -77,16 +80,19 @@ public class DuckdbSourceConnection extends AbstractJdbcSourceConnection {
7780
* @param secretValueProvider Secret value provider for the environment variable expansion.
7881
* @param homeLocationFindService Home location find service.
7982
* @param dqoDuckdbConfiguration Configuration settings for duckdb.
83+
* @param dataTypeParser Data type parser that parses the schema of structures.
8084
*/
8185
@Autowired
8286
public DuckdbSourceConnection(JdbcConnectionPool jdbcConnectionPool,
8387
SecretValueProvider secretValueProvider,
8488
DuckdbConnectionProvider duckdbConnectionProvider,
8589
HomeLocationFindService homeLocationFindService,
86-
DqoDuckdbConfiguration dqoDuckdbConfiguration) {
90+
DqoDuckdbConfiguration dqoDuckdbConfiguration,
91+
DuckDBDataTypeParser dataTypeParser) {
8792
super(jdbcConnectionPool, secretValueProvider, duckdbConnectionProvider);
8893
this.homeLocationFindService = homeLocationFindService;
8994
this.dqoDuckdbConfiguration = dqoDuckdbConfiguration;
95+
this.dataTypeParser = dataTypeParser;
9096
}
9197

9298
/**
@@ -412,7 +418,16 @@ public List<TableSpec> retrieveTableMetadata(String schemaName,
412418
String dataType = colRow.getString("column_type");
413419
boolean isNullable = Objects.equals(colRow.getString("null"), "YES");
414420
ColumnSpec columnSpec = prepareNewColumnSpec(dataType, isNullable);
421+
if (dataType != null && (dataType.startsWith("STRUCT") || dataType.startsWith("UNION") || dataType.startsWith("MAP"))) {
422+
columnSpec.setTypeSnapshot(new ColumnTypeSnapshotSpec(dataType, isNullable));
423+
}
415424
tableSpec.getColumns().put(columnName, columnSpec);
425+
426+
DuckDBField parsedField = this.dataTypeParser.parseFieldType(dataType, columnName);
427+
if (parsedField.isStruct()) {
428+
String parentColumnPrefix = parsedField.isArray() ? columnName + "[0]" : columnName;
429+
addNestedFieldsFromStructs(parentColumnPrefix, parsedField, tableSpec.getColumns());
430+
}
416431
}
417432
} catch (Exception e){
418433
if (!e.getMessage().contains("SQL query failed: java.sql.SQLException: IO Error: No files found that match the pattern")){
@@ -424,6 +439,28 @@ public List<TableSpec> retrieveTableMetadata(String schemaName,
424439
return tableSpecs;
425440
}
426441

442+
/**
443+
* Traverses a structure of nested fields inside STRUCT data types and adds all nested fields.
444+
* @param parentColumnName Parent column name used as a prefix to access nested fields.
445+
* @param structField DuckDB field schema that as parsed - must be a STRUCT field.
446+
* @param targetColumnsMap Target column map to add generated columns.
447+
*/
448+
private void addNestedFieldsFromStructs(String parentColumnName, DuckDBField structField, ColumnSpecMap targetColumnsMap) {
449+
if (structField.isStruct() && structField.getNestedFields() != null) {
450+
for (DuckDBField childField : structField.getNestedFields()) {
451+
ColumnSpec columnSpec = prepareNewColumnSpec(childField.getTypeName(), childField.isNullable());
452+
columnSpec.getTypeSnapshot().setNested(true);
453+
String nestedFieldName = parentColumnName + "." + childField.getFieldName();
454+
targetColumnsMap.put(nestedFieldName, columnSpec);
455+
456+
if (childField.isStruct()) {
457+
String childColumnPrefix = childField.isArray() ? nestedFieldName + "[0]" : nestedFieldName;
458+
addNestedFieldsFromStructs(childColumnPrefix, childField, targetColumnsMap);
459+
}
460+
}
461+
}
462+
}
463+
427464
/**
428465
* Creates a new column spec.
429466
* @param dataType A data type of the column.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Copyright © 2021 DQOps (support@dqops.com)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.dqops.connectors.duckdb.schema;
18+
19+
/**
20+
* Parser that is able to parse data types from DuckDB.
21+
*/
22+
public interface DuckDBDataTypeParser {
23+
/**
24+
* Parses a text in the <code>dataType</code> into an object that describes the field. Parses also structures and arrays.
25+
* @param dataType Data type to parse.
26+
* @param fieldName Field name to store in the result object (because the root data type has no field name, only nested fields contain a name).
27+
* @return Data type, also for nested structures.
28+
*/
29+
DuckDBField parseFieldType(String dataType, String fieldName);
30+
}

0 commit comments

Comments
 (0)