Skip to content

Commit

Permalink
Fix the issue that map flatten shouldn't remove the map field from th…
Browse files Browse the repository at this point in the history
…e record (apache#13243)
  • Loading branch information
xiangfu0 authored May 28, 2024
1 parent 1b16615 commit be4f740
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public class SegmentProcessorFrameworkTest {

private Schema _schema;
private Schema _schemaMV;
private Schema _schemaWithComplexType;

private final List<Object[]> _rawData =
Arrays.asList(new Object[]{"abc", 1000, 1597719600000L}, new Object[]{null, 2000, 1597773600000L},
Expand Down Expand Up @@ -119,18 +120,27 @@ public void setup()
_tableConfigWithFixedSegmentName.getIndexingConfig().setSegmentNameGeneratorType("fixed");

_schema =
new Schema.SchemaBuilder().setSchemaName("mySchema").addSingleValueDimension("campaign", DataType.STRING, "")
new Schema.SchemaBuilder().setSchemaName("mySchema")
.addSingleValueDimension("campaign", DataType.STRING, "")
.addSingleValueDimension("campaign.inner1", DataType.STRING)
.addSingleValueDimension("campaign.inner1.inner2", DataType.STRING)
// NOTE: Intentionally put 1000 as default value to test skipping null values during rollup
.addMetric("clicks", DataType.INT, 1000)
.addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build();
_schemaMV =
new Schema.SchemaBuilder().setSchemaName("mySchema").addMultiValueDimension("campaign", DataType.STRING, "")
new Schema.SchemaBuilder().setSchemaName("mySchema")
.addMultiValueDimension("campaign", DataType.STRING, "")
// NOTE: Intentionally put 1000 as default value to test skipping null values during rollup
.addMetric("clicks", DataType.INT, 1000)
.addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build();
_schemaWithComplexType =
new Schema.SchemaBuilder().setSchemaName("mySchema")
.addSingleValueDimension("campaign", DataType.JSON)
.addSingleValueDimension("campaign.inner1", DataType.STRING)
.addSingleValueDimension("campaign.inner1.inner2", DataType.STRING)
// NOTE: Intentionally put 1000 as default value to test skipping null values during rollup
.addMetric("clicks", DataType.INT, 1000)
.addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build();

// create segments in many folders
_singleSegment = createInputSegments(new File(TEMP_DIR, "single_segment"), _rawData, 1, _schema);
_multipleSegments = createInputSegments(new File(TEMP_DIR, "multiple_segments"), _rawData, 3, _schema);
Expand Down Expand Up @@ -258,16 +268,19 @@ public void testSegmentGenerationWithComplexType() throws Exception {
_tableConfig.setIngestionConfig(ingestionConfig);
// Default configs
SegmentProcessorConfig config =
new SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schema).build();
new SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schemaWithComplexType).build();
SegmentProcessorFramework framework =
new SegmentProcessorFramework(_recordReaderWithComplexType, config, workingDir);
List<File> outputSegments = framework.process();
ImmutableSegment segment = ImmutableSegmentLoader.load(outputSegments.get(0), ReadMode.mmap);
SegmentMetadata segmentMetadata = segment.getSegmentMetadata();
// Pick the column created from complex type
ColumnMetadata campaignMetadata = segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2");
ColumnMetadata campaignInner2Metadata = segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2");
// Verify we see a specific value parsed from the complexType
Assert.assertEquals(campaignMetadata.getMinValue().compareTo("inner2v"), 0);
Assert.assertEquals(campaignInner2Metadata.getMinValue().compareTo("inner2v"), 0);
ColumnMetadata campaignMetadata = segmentMetadata.getColumnMetadataFor("campaign");
Assert.assertEquals(
campaignMetadata.getMinValue().compareTo("{\"inner1\":{\"inner2\":\"inner2v\"},\"inner\":\"innerv\"}"), 0);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ protected void flattenMap(GenericRow record, List<String> columns) {
for (String column : columns) {
Object value = record.getValue(column);
if (value instanceof Map) {
Map<String, Object> map = (Map) record.removeValue(column);
Map<String, Object> map = (Map) value;
List<String> mapColumns = new ArrayList<>();
for (Map.Entry<String, Object> entry : new ArrayList<>(map.entrySet())) {
String flattenName = concat(column, entry.getKey());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
{
"columnName": "created_at_timestamp",
"transformFunction": "fromDateTime(created_at, 'yyyy-MM-dd''T''HH:mm:ss''Z''')"
},
{
"columnName": "payload_str",
"transformFunction": "jsonFormat(payload)"
}
],
"complexTypeConfig": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@
"name": "type",
"dataType": "STRING"
},
{
"name": "payload",
"dataType": "JSON"
},
{
"name": "payload_str",
"dataType": "STRING"
},
{
"name": "payload.push_id",
"dataType": "LONG"
Expand Down

0 comments on commit be4f740

Please sign in to comment.