Skip to content

Commit

Permalink
apacheGH-41741: [C++] Check that extension metadata key is present be…
Browse files Browse the repository at this point in the history
…fore attempting to delete it (apache#41763)

### Rationale for this change

Neither Schema.fbs nor the Arrow C Data interface nor the columnar specification indicates that the ARROW:extension:metadata key must be present; however, the `ImportType()` implementation assumes that both `ARROW:extension:name` and `ARROW:extension:metadata` are both present and throws an exception if `ARROW:extension:metadata` is missing. This causes pyarrow to crash (see issue for reproducer).

### What changes are included in this PR?

This PR checks that the extension metadata is present before attempting to delete it.

### Are these changes tested?

Yes (test added).

### Are there any user-facing changes?

No.
* GitHub Issue: apache#41741

Authored-by: Dewey Dunnington <dewey@voltrondata.com>
Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
  • Loading branch information
paleolimbot authored May 24, 2024
1 parent 19044ee commit b275483
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 2 deletions.
10 changes: 8 additions & 2 deletions cpp/src/arrow/c/bridge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1059,8 +1059,14 @@ struct SchemaImporter {
ARROW_ASSIGN_OR_RAISE(
type_, registered_ext_type->Deserialize(std::move(type_),
metadata_.extension_serialized));
RETURN_NOT_OK(metadata_.metadata->DeleteMany(
{metadata_.extension_name_index, metadata_.extension_serialized_index}));
// If metadata is present, delete both metadata keys (otherwise, just remove
// the extension name key)
if (metadata_.extension_serialized_index >= 0) {
RETURN_NOT_OK(metadata_.metadata->DeleteMany(
{metadata_.extension_name_index, metadata_.extension_serialized_index}));
} else {
RETURN_NOT_OK(metadata_.metadata->Delete(metadata_.extension_name_index));
}
}
}

Expand Down
17 changes: 17 additions & 0 deletions cpp/src/arrow/c/bridge_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4102,6 +4102,23 @@ TEST_F(TestArrayRoundtrip, RegisteredExtension) {
TestWithArrayFactory(NestedFactory(ExampleDictExtension));
}

TEST_F(TestArrayRoundtrip, RegisteredExtensionNoMetadata) {
auto ext_type = std::make_shared<MetadataOptionalExtensionType>();
ExtensionTypeGuard guard(ext_type);

auto ext_metadata =
KeyValueMetadata::Make({"ARROW:extension:name"}, {ext_type->extension_name()});
auto ext_field = field("", ext_type->storage_type(), true, std::move(ext_metadata));

struct ArrowSchema c_schema {};
SchemaExportGuard schema_guard(&c_schema);
ASSERT_OK(ExportField(*ext_field, &c_schema));

ASSERT_OK_AND_ASSIGN(auto ext_type_roundtrip, ImportType(&c_schema));
ASSERT_EQ(ext_type_roundtrip->id(), Type::EXTENSION);
AssertTypeEqual(ext_type_roundtrip, ext_type);
}

TEST_F(TestArrayRoundtrip, UnregisteredExtension) {
auto StorageExtractor = [](ArrayFactory factory) {
return [factory]() -> Result<std::shared_ptr<Array>> {
Expand Down
19 changes: 19 additions & 0 deletions cpp/src/arrow/testing/extension_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,25 @@ class ARROW_TESTING_EXPORT DictExtensionType : public ExtensionType {
std::string Serialize() const override { return "dict-extension-serialized"; }
};

// A minimal extension type that does not error when passed blank extension information
class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public ExtensionType {
public:
MetadataOptionalExtensionType() : ExtensionType(null()) {}
std::string extension_name() const override { return "metadata.optional"; }
std::string Serialize() const override { return ""; }
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
return nullptr;
}
bool ExtensionEquals(const ExtensionType& other) const override {
return other.extension_name() == extension_name();
}
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized_data) const override {
return std::make_shared<MetadataOptionalExtensionType>();
}
};

class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
Expand Down

0 comments on commit b275483

Please sign in to comment.