diff --git a/arrow/extensions/extensions.go b/arrow/extensions/extensions.go index 4e02823a..22fb01fd 100644 --- a/arrow/extensions/extensions.go +++ b/arrow/extensions/extensions.go @@ -21,8 +21,8 @@ import ( ) var canonicalExtensionTypes = []arrow.ExtensionType{ - &Bool8Type{}, - &UUIDType{}, + NewBool8Type(), + NewUUIDType(), &OpaqueType{}, &JSONType{}, } diff --git a/parquet/pqarrow/encode_arrow_test.go b/parquet/pqarrow/encode_arrow_test.go index 1ff1710b..b75a5c01 100644 --- a/parquet/pqarrow/encode_arrow_test.go +++ b/parquet/pqarrow/encode_arrow_test.go @@ -2057,6 +2057,7 @@ func (ps *ParquetIOTestSuite) TestArrowExtensionTypeRoundTrip() { defer tbl.Release() ps.roundTripTable(mem, tbl, true) + ps.roundTripTable(mem, tbl, false) } func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() { diff --git a/parquet/pqarrow/schema.go b/parquet/pqarrow/schema.go index 77b8f750..6d30359c 100644 --- a/parquet/pqarrow/schema.go +++ b/parquet/pqarrow/schema.go @@ -514,8 +514,14 @@ func arrowFromFLBA(logical schema.LogicalType, length int) (arrow.DataType, erro switch logtype := logical.(type) { case schema.DecimalLogicalType: return arrowDecimal(logtype), nil - case schema.NoLogicalType, schema.IntervalLogicalType, schema.UUIDLogicalType: + case schema.NoLogicalType, schema.IntervalLogicalType: return &arrow.FixedSizeBinaryType{ByteWidth: int(length)}, nil + case schema.UUIDLogicalType: + uuidType := arrow.GetExtensionType("arrow.uuid") + if uuidType == nil { + return &arrow.FixedSizeBinaryType{ByteWidth: int(length)}, nil + } + return uuidType, nil case schema.Float16LogicalType: return &arrow.Float16Type{}, nil default: @@ -984,13 +990,14 @@ func applyOriginalStorageMetadata(origin arrow.Field, inferred *SchemaField) (mo return } - if !arrow.TypeEqual(extType.StorageType(), inferred.Field.Type) { - return modified, fmt.Errorf("%w: mismatch storage type '%s' for extension type '%s'", - arrow.ErrInvalid, inferred.Field.Type, extType) - } + if modified && !arrow.TypeEqual(extType, inferred.Field.Type) { + if !arrow.TypeEqual(extType.StorageType(), inferred.Field.Type) { + return modified, fmt.Errorf("%w: mismatch storage type '%s' for extension type '%s'", + arrow.ErrInvalid, inferred.Field.Type, extType) + } - inferred.Field.Type = extType - modified = true + inferred.Field.Type = extType + } case arrow.SPARSE_UNION, arrow.DENSE_UNION: err = xerrors.New("unimplemented type") case arrow.STRUCT: