Skip to content

Commit

Permalink
Revert "Pass metadata and value into VariantLogicalType"
Browse files Browse the repository at this point in the history
  • Loading branch information
neilechao committed Feb 20, 2025
1 parent 89e90c8 commit 3e9c4f7
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 89 deletions.
3 changes: 1 addition & 2 deletions cpp/src/parquet/arrow/arrow_schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,7 @@ TEST_F(TestConvertParquetSchema, ParquetAnnotatedFields) {
::arrow::int64()},
{"json", LogicalType::JSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::utf8()},
{"bson", LogicalType::BSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
{"variant", LogicalType::Variant("metadata", "value"), ParquetType::BYTE_ARRAY, -1,
::arrow::binary()},
{"variant", LogicalType::Variant(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
{"interval", LogicalType::Interval(), ParquetType::FIXED_LEN_BYTE_ARRAY, 12,
::arrow::fixed_size_binary(12)},
{"uuid", LogicalType::UUID(), ParquetType::FIXED_LEN_BYTE_ARRAY, 16,
Expand Down
23 changes: 9 additions & 14 deletions cpp/src/parquet/schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) {
std::vector<ConfirmNewTypeIncompatibilityArguments> cases = {
{LogicalType::UUID(), check_is_UUID},
{LogicalType::Float16(), check_is_float16},
{LogicalType::Variant("metadata", "value"), check_is_variant},
{LogicalType::Variant(), check_is_variant},
{LogicalType::Null(), check_is_null},
{LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), check_is_time},
{LogicalType::Time(false, LogicalType::TimeUnit::MICROS), check_is_time},
Expand Down Expand Up @@ -1251,7 +1251,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeProperties) {
{BSONLogicalType::Make(), false, true, true},
{UUIDLogicalType::Make(), false, true, true},
{Float16LogicalType::Make(), false, true, true},
{VariantLogicalType::Make("metadata", "value"), false, true, true},
{VariantLogicalType::Make(), false, true, true},
{NoLogicalType::Make(), false, false, true},
};

Expand Down Expand Up @@ -1549,9 +1549,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) {
{LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"},
{LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"},
{LogicalType::Float16(), "Float16", R"({"Type": "Float16"})"},
{LogicalType::Variant("metadata", "value"),
"Variant(metadata=metadata,value=value)",
R"({"Type": "Variant", "metadata": metadata, "value": value})"},
{LogicalType::Variant(), "Variant", R"({"Type": "Variant"})"},
{LogicalType::None(), "None", R"({"Type": "None"})"},
};

Expand Down Expand Up @@ -1602,7 +1600,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) {
{LogicalType::BSON(), SortOrder::UNSIGNED},
{LogicalType::UUID(), SortOrder::UNSIGNED},
{LogicalType::Float16(), SortOrder::SIGNED},
{LogicalType::Variant("metadata", "value"), SortOrder::UNKNOWN},
{LogicalType::Variant(), SortOrder::UNKNOWN},
{LogicalType::None(), SortOrder::UNKNOWN}};

for (const ExpectedSortOrder& c : cases) {
Expand Down Expand Up @@ -1746,11 +1744,10 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {

// Incompatible primitive type ...
ASSERT_ANY_THROW(PrimitiveNode::Make("variant", Repetition::REQUIRED,
VariantLogicalType::Make("metadata", "value"),
Type::DOUBLE));
VariantLogicalType::Make(), Type::DOUBLE));
// Incompatible primitive type ...
ASSERT_ANY_THROW(PrimitiveNode::Make("variant", Repetition::REQUIRED,
VariantLogicalType::Make("metadata", "value"),
VariantLogicalType::Make(),
Type::FIXED_LEN_BYTE_ARRAY, 2));

// Non-positive length argument for fixed length binary ...
Expand Down Expand Up @@ -1946,9 +1943,8 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) {
{"float16", LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2, false,
ConvertedType::NA, true,
[this]() { return element_->logicalType.__isset.FLOAT16; }},
{"variant", LogicalType::Variant("metadata", "value"), Type::BYTE_ARRAY, -1, false,
ConvertedType::NA, true,
[this]() { return element_->logicalType.__isset.VARIANT; }},
{"variant", LogicalType::Variant(), Type::BYTE_ARRAY, -1, false, ConvertedType::NA,
true, [this]() { return element_->logicalType.__isset.VARIANT; }},
{"none", LogicalType::None(), Type::INT64, -1, false, ConvertedType::NA, false,
check_nothing}};

Expand Down Expand Up @@ -2286,8 +2282,7 @@ TEST(TestLogicalTypeSerialization, Roundtrips) {
{LogicalType::BSON(), Type::BYTE_ARRAY, -1},
{LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16},
{LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2},
// TODO(neilechao) discussing how to model Variant
// {LogicalType::Variant("metadata", "value"), Type::BYTE_ARRAY, -1},
{LogicalType::Variant(), Type::BYTE_ARRAY, -1},
{LogicalType::None(), Type::BOOLEAN, -1}};

for (const AnnotatedPrimitiveNodeFactoryArguments& c : cases) {
Expand Down
76 changes: 8 additions & 68 deletions cpp/src/parquet/types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -480,9 +480,7 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(
} else if (type.__isset.FLOAT16) {
return Float16LogicalType::Make();
} else if (type.__isset.VARIANT) {
std::string metadata = type.VARIANT.metadata;
std::string value = type.VARIANT.value;
return VariantLogicalType::Make(metadata, value);
return VariantLogicalType::Make();
} else {
throw ParquetException("Metadata contains Thrift LogicalType that is not recognized");
}
Expand Down Expand Up @@ -540,9 +538,8 @@ std::shared_ptr<const LogicalType> LogicalType::Float16() {
return Float16LogicalType::Make();
}

std::shared_ptr<const LogicalType> LogicalType::Variant(std::string metadata,
std::string value) {
return VariantLogicalType::Make(std::move(metadata), std::move(value));
std::shared_ptr<const LogicalType> LogicalType::Variant() {
return VariantLogicalType::Make();
}

std::shared_ptr<const LogicalType> LogicalType::None() { return NoLogicalType::Make(); }
Expand Down Expand Up @@ -1637,73 +1634,16 @@ class LogicalType::Impl::Variant final : public LogicalType::Impl::Incompatible,
public:
friend class VariantLogicalType;

std::string ToString() const override;
std::string ToJSON() const override;
format::LogicalType ToThrift() const override;
bool Equals(const LogicalType& other) const override;

const std::string& metadata() const { return metadata_; }
const std::string& value() const { return value_; }
OVERRIDE_TOSTRING(Variant)
OVERRIDE_TOTHRIFT(VariantType, VARIANT)

private:
Variant(std::string metadata, std::string value)
Variant()
: LogicalType::Impl(LogicalType::Type::VARIANT, SortOrder::UNKNOWN),
LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY),
metadata_(std::move(metadata)),
value_(std::move(value)) {}

std::string metadata_;
std::string value_;
LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY) {}
};

std::string LogicalType::Impl::Variant::ToString() const {
std::stringstream type;
type << "Variant(metadata=" << metadata() << ",value=" << value() << ")";
return type.str();
}

std::string LogicalType::Impl::Variant::ToJSON() const {
std::stringstream json;
// TODO(neilechao) escape special characters in metadata and value
json << R"({"Type": "Variant", "metadata": )" << metadata_ << R"(, "value": )" << value_
<< "}";
return json.str();
}

format::LogicalType LogicalType::Impl::Variant::ToThrift() const {
format::LogicalType type;
format::VariantType variant_type;

variant_type.__set_metadata(metadata_);
variant_type.__set_value(value_);

type.__set_VARIANT(variant_type);
return type;
}

bool LogicalType::Impl::Variant::Equals(const LogicalType& other) const {
if (other.is_variant()) {
const auto& other_variant = checked_cast<const VariantLogicalType&>(other);
return metadata_ == other_variant.metadata() && value_ == other_variant.value();
} else {
return false;
}
}

std::shared_ptr<const LogicalType> VariantLogicalType::Make(std::string metadata,
std::string value) {
auto* logical_type = new VariantLogicalType();
logical_type->impl_.reset(new LogicalType::Impl::Variant(metadata, value));
return std::shared_ptr<const LogicalType>(logical_type);
}

const std::string& VariantLogicalType::metadata() const {
return (dynamic_cast<const LogicalType::Impl::Variant&>(*impl_)).metadata();
}

const std::string& VariantLogicalType::value() const {
return (dynamic_cast<const LogicalType::Impl::Variant&>(*impl_)).value();
}
GENERATE_MAKE(Variant)

class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible,
public LogicalType::Impl::UniversalApplicable {
Expand Down
7 changes: 2 additions & 5 deletions cpp/src/parquet/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,7 @@ class PARQUET_EXPORT LogicalType {
static std::shared_ptr<const LogicalType> BSON();
static std::shared_ptr<const LogicalType> UUID();
static std::shared_ptr<const LogicalType> Float16();
static std::shared_ptr<const LogicalType> Variant(std::string metadata,
std::string value);
static std::shared_ptr<const LogicalType> Variant();

/// \brief Create a placeholder for when no logical type is specified
static std::shared_ptr<const LogicalType> None();
Expand Down Expand Up @@ -453,9 +452,7 @@ class PARQUET_EXPORT Float16LogicalType : public LogicalType {
/// \brief Allowed for physical type BYTE_ARRAY.
class PARQUET_EXPORT VariantLogicalType : public LogicalType {
public:
static std::shared_ptr<const LogicalType> Make(std::string metadata, std::string data);
const std::string& metadata() const;
const std::string& value() const;
static std::shared_ptr<const LogicalType> Make();

private:
VariantLogicalType() = default;
Expand Down

0 comments on commit 3e9c4f7

Please sign in to comment.