diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java index 00631e778b4e..d0b02fcc0e4a 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java @@ -88,6 +88,7 @@ public class SegmentProcessorFrameworkTest { private Schema _schema; private Schema _schemaMV; + private Schema _schemaWithComplexType; private final List _rawData = Arrays.asList(new Object[]{"abc", 1000, 1597719600000L}, new Object[]{null, 2000, 1597773600000L}, @@ -119,18 +120,27 @@ public void setup() _tableConfigWithFixedSegmentName.getIndexingConfig().setSegmentNameGeneratorType("fixed"); _schema = - new Schema.SchemaBuilder().setSchemaName("mySchema").addSingleValueDimension("campaign", DataType.STRING, "") + new Schema.SchemaBuilder().setSchemaName("mySchema") + .addSingleValueDimension("campaign", DataType.STRING, "") .addSingleValueDimension("campaign.inner1", DataType.STRING) .addSingleValueDimension("campaign.inner1.inner2", DataType.STRING) // NOTE: Intentionally put 1000 as default value to test skipping null values during rollup .addMetric("clicks", DataType.INT, 1000) .addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build(); _schemaMV = - new Schema.SchemaBuilder().setSchemaName("mySchema").addMultiValueDimension("campaign", DataType.STRING, "") + new Schema.SchemaBuilder().setSchemaName("mySchema") + .addMultiValueDimension("campaign", DataType.STRING, "") + // NOTE: Intentionally put 1000 as default value to test skipping null values during rollup + .addMetric("clicks", DataType.INT, 1000) + .addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build(); + _schemaWithComplexType = + new Schema.SchemaBuilder().setSchemaName("mySchema") + .addSingleValueDimension("campaign", DataType.JSON) + .addSingleValueDimension("campaign.inner1", DataType.STRING) + .addSingleValueDimension("campaign.inner1.inner2", DataType.STRING) // NOTE: Intentionally put 1000 as default value to test skipping null values during rollup .addMetric("clicks", DataType.INT, 1000) .addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build(); - // create segments in many folders _singleSegment = createInputSegments(new File(TEMP_DIR, "single_segment"), _rawData, 1, _schema); _multipleSegments = createInputSegments(new File(TEMP_DIR, "multiple_segments"), _rawData, 3, _schema); @@ -258,16 +268,19 @@ public void testSegmentGenerationWithComplexType() throws Exception { _tableConfig.setIngestionConfig(ingestionConfig); // Default configs SegmentProcessorConfig config = - new SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schema).build(); + new SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schemaWithComplexType).build(); SegmentProcessorFramework framework = new SegmentProcessorFramework(_recordReaderWithComplexType, config, workingDir); List outputSegments = framework.process(); ImmutableSegment segment = ImmutableSegmentLoader.load(outputSegments.get(0), ReadMode.mmap); SegmentMetadata segmentMetadata = segment.getSegmentMetadata(); // Pick the column created from complex type - ColumnMetadata campaignMetadata = segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2"); + ColumnMetadata campaignInner2Metadata = segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2"); // Verify we see a specific value parsed from the complexType - Assert.assertEquals(campaignMetadata.getMinValue().compareTo("inner2v"), 0); + Assert.assertEquals(campaignInner2Metadata.getMinValue().compareTo("inner2v"), 0); + ColumnMetadata campaignMetadata = segmentMetadata.getColumnMetadataFor("campaign"); + Assert.assertEquals( + campaignMetadata.getMinValue().compareTo("{\"inner1\":{\"inner2\":\"inner2v\"},\"inner\":\"innerv\"}"), 0); } @Test diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java index a67bc54d4624..48db4c6ad331 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java @@ -257,7 +257,7 @@ protected void flattenMap(GenericRow record, List columns) { for (String column : columns) { Object value = record.getValue(column); if (value instanceof Map) { - Map map = (Map) record.removeValue(column); + Map map = (Map) value; List mapColumns = new ArrayList<>(); for (Map.Entry entry : new ArrayList<>(map.entrySet())) { String flattenName = concat(column, entry.getKey()); diff --git a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json index d434773aab59..357151aadf0c 100644 --- a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json +++ b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json @@ -16,6 +16,10 @@ { "columnName": "created_at_timestamp", "transformFunction": "fromDateTime(created_at, 'yyyy-MM-dd''T''HH:mm:ss''Z''')" + }, + { + "columnName": "payload_str", + "transformFunction": "jsonFormat(payload)" } ], "complexTypeConfig": { diff --git a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json index 1c79af79cf22..dbacd3d21394 100644 --- a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json +++ b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json @@ -8,6 +8,14 @@ "name": "type", "dataType": "STRING" }, + { + "name": "payload", + "dataType": "JSON" + }, + { + "name": "payload_str", + "dataType": "STRING" + }, { "name": "payload.push_id", "dataType": "LONG"