diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu index 3683285a89b..f3251ddda83 100644 --- a/cpp/src/interop/to_arrow_device.cu +++ b/cpp/src/interop/to_arrow_device.cu @@ -17,6 +17,7 @@ #include "arrow_utilities.hpp" #include +#include #include #include #include @@ -266,9 +267,12 @@ int dispatch_to_arrow_device::operator()(cudf::column&& colu ArrowArray* out) { nanoarrow::UniqueArray tmp; + + cudf::dictionary_column_view const dview{column.view()}; + NANOARROW_RETURN_NOT_OK(initialize_array( tmp.get(), - id_to_arrow_type(column.child(cudf::dictionary_column_view::indices_column_index).type().id()), + dview.is_empty() ? NANOARROW_TYPE_INT32 : id_to_arrow_type(dview.indices().type().id()), column)); NANOARROW_RETURN_NOT_OK(ArrowArrayAllocateDictionary(tmp.get())); @@ -276,11 +280,15 @@ int dispatch_to_arrow_device::operator()(cudf::column&& colu NANOARROW_RETURN_NOT_OK(set_null_mask(contents, tmp.get())); auto indices_contents = - contents.children[cudf::dictionary_column_view::indices_column_index]->release(); + dview.is_empty() + ? cudf::make_empty_column(cudf::type_id::INT32)->release() + : contents.children[cudf::dictionary_column_view::indices_column_index]->release(); NANOARROW_RETURN_NOT_OK( set_buffer(std::move(indices_contents.data), fixed_width_data_buffer_idx, tmp.get())); - auto& keys = contents.children[cudf::dictionary_column_view::keys_column_index]; + auto keys = dview.is_empty() + ? cudf::make_empty_column(cudf::type_id::INT64) + : std::move(contents.children[cudf::dictionary_column_view::keys_column_index]); NANOARROW_RETURN_NOT_OK(cudf::type_dispatcher( keys->type(), dispatch_to_arrow_device{}, std::move(*keys), stream, mr, tmp->dictionary)); diff --git a/cpp/src/interop/to_arrow_schema.cpp b/cpp/src/interop/to_arrow_schema.cpp index 7ffee53dac8..aabba447ee2 100644 --- a/cpp/src/interop/to_arrow_schema.cpp +++ b/cpp/src/interop/to_arrow_schema.cpp @@ -185,9 +185,11 @@ int dispatch_to_arrow_type::operator()(column_view input, { cudf::dictionary_column_view const dview{input}; - NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(out, id_to_arrow_type(dview.indices().type().id()))); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType( + out, dview.is_empty() ? NANOARROW_TYPE_INT32 : id_to_arrow_type(dview.indices().type().id()))); NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateDictionary(out)); ArrowSchemaInit(out->dictionary); + if (dview.is_empty()) { return ArrowSchemaSetType(out->dictionary, NANOARROW_TYPE_INT64); } auto dict_keys = dview.keys(); return cudf::type_dispatcher( diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp index 7fcb1478196..4441b51f7c8 100644 --- a/cpp/tests/interop/to_arrow_device_test.cpp +++ b/cpp/tests/interop/to_arrow_device_test.cpp @@ -356,6 +356,18 @@ TEST_F(ToArrowDeviceTest, EmptyTable) compare_arrays(schema.get(), arr.get(), &got_arrow_device->array); } +TEST_F(ToArrowDeviceTest, EmptyDictionary) +{ + auto empty = cudf::make_empty_column(cudf::type_id::DICTIONARY32); + auto meta = std::vector({cudf::column_metadata{"d"}}); + + auto arrow_schema = cudf::to_arrow_schema(cudf::table_view({empty->view()}), meta); + ASSERT_EQ(arrow_schema->n_children, 1); + auto dictionary = arrow_schema->children[0]->dictionary; + ASSERT_NE(dictionary, nullptr); + EXPECT_EQ(dictionary->n_children, 0); +} + TEST_F(ToArrowDeviceTest, DateTimeTable) { auto data = {1, 2, 3, 4, 5, 6};