Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-39489: [C++][Parquet] ConvertedType TIME and TIMESTAMP should imply adjustedToUtc #39491

Merged
merged 4 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/src/parquet/arrow/arrow_schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,13 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
ParquetType::INT64,
ConvertedType::TIMESTAMP_MILLIS));
arrow_fields.push_back(
::arrow::field("timestamp", ::arrow::timestamp(TimeUnit::MILLI), false));
::arrow::field("timestamp", ::arrow::timestamp(TimeUnit::MILLI, "UTC"), false));

parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
ParquetType::INT64,
ConvertedType::TIMESTAMP_MICROS));
arrow_fields.push_back(
::arrow::field("timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO), false));
::arrow::field("timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO, "UTC"), false));

parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
ParquetType::INT32, ConvertedType::DATE));
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/parquet/arrow/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ static std::shared_ptr<const LogicalType> TimestampLogicalTypeFromArrowTimestamp
/*is_from_converted_type=*/false,
/*force_set_converted_type=*/true);
case ::arrow::TimeUnit::NANO:
return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS);
return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS,
/*is_from_converted_type=*/false,
/*force_set_converted_type=*/false);
case ::arrow::TimeUnit::SECOND:
// No equivalent parquet logical type.
break;
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/parquet/arrow/schema_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ Result<std::shared_ptr<ArrowType>> MakeArrowTime64(const LogicalType& logical_ty

Result<std::shared_ptr<ArrowType>> MakeArrowTimestamp(const LogicalType& logical_type) {
const auto& timestamp = checked_cast<const TimestampLogicalType&>(logical_type);
const bool utc_normalized =
timestamp.is_from_converted_type() ? false : timestamp.is_adjusted_to_utc();
const bool utc_normalized = timestamp.is_adjusted_to_utc();
static const char* utc_timezone = "UTC";
switch (timestamp.time_unit()) {
case LogicalType::TimeUnit::MILLIS:
Expand Down
18 changes: 14 additions & 4 deletions cpp/src/parquet/types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,15 +345,25 @@ std::shared_ptr<const LogicalType> LogicalType::FromConvertedType(
case ConvertedType::DATE:
return DateLogicalType::Make();
case ConvertedType::TIME_MILLIS:
return TimeLogicalType::Make(true, LogicalType::TimeUnit::MILLIS);
// ConvertedType::TIME_{*} are deprecated in favor of LogicalType::Time, the
// compatibility for ConvertedType::TIME_{*} are listed in
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#deprecated-time-convertedtype
return TimeLogicalType::Make(/*is_adjusted_to_utc=*/true,
LogicalType::TimeUnit::MILLIS);
case ConvertedType::TIME_MICROS:
return TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
return TimeLogicalType::Make(/*is_adjusted_to_utc=*/true,
LogicalType::TimeUnit::MICROS);
case ConvertedType::TIMESTAMP_MILLIS:
return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS,
// ConvertedType::TIMESTAMP_{*} are deprecated in favor of LogicalType::Timestamp,
// the compatibility for ConvertedType::TIMESTAMP_{*} are listed in
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#deprecated-timestamp-convertedtype
return TimestampLogicalType::Make(/*is_adjusted_to_utc=*/true,
LogicalType::TimeUnit::MILLIS,
/*is_from_converted_type=*/true,
/*force_set_converted_type=*/false);
case ConvertedType::TIMESTAMP_MICROS:
return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS,
return TimestampLogicalType::Make(/*is_adjusted_to_utc=*/true,
LogicalType::TimeUnit::MICROS,
/*is_from_converted_type=*/true,
/*force_set_converted_type=*/false);
case ConvertedType::INTERVAL:
Expand Down
Loading