diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 0033d89fc40fc..a50dcdead2c97 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -363,10 +363,6 @@ struct CastStruct { const auto& in_field = in_type.field(in_field_index); // If there are more in_fields check if they match the out_field. if (in_field->name() == out_field->name()) { - if (in_field->nullable() && !out_field->nullable()) { - return Status::TypeError("cannot cast nullable field to non-nullable field: ", - in_type.ToString(), " ", out_type.ToString()); - } // Found matching in_field and out_field. fields_to_select[out_field_index++] = in_field_index; // Using the same in_field for multiple out_fields is not allowed. @@ -403,17 +399,27 @@ struct CastStruct { } int out_field_index = 0; - for (int field_index : fields_to_select) { - const auto& target_type = out->type()->field(out_field_index++)->type(); - if (field_index == kFillNullSentinel) { - ARROW_ASSIGN_OR_RAISE(auto nulls, - MakeArrayOfNull(target_type->GetSharedPtr(), batch.length)); + for (int in_field_index : fields_to_select) { + const auto& out_field = out_type.field(out_field_index++); + const auto& out_field_type = out_field->type(); + if (in_field_index == kFillNullSentinel) { + ARROW_ASSIGN_OR_RAISE( + auto nulls, MakeArrayOfNull(out_field_type->GetSharedPtr(), batch.length, + ctx->memory_pool())); out_array->child_data.push_back(nulls->data()); } else { - const auto& values = (in_array.child_data[field_index].ToArrayData()->Slice( + const auto& in_field = in_type.field(in_field_index); + const auto& in_values = (in_array.child_data[in_field_index].ToArrayData()->Slice( in_array.offset, in_array.length)); - ARROW_ASSIGN_OR_RAISE(Datum cast_values, - Cast(values, target_type, options, ctx->exec_context())); + if (in_field->nullable() && !out_field->nullable() && + in_values->GetNullCount() > 0) { + return Status::Invalid( + "field '", in_field->name(), "' of type ", in_field->type()->ToString(), + " has nulls. Can't cast to non-nullable field '", out_field->name(), + "' of type ", out_field_type->ToString()); + } + ARROW_ASSIGN_OR_RAISE(Datum cast_values, Cast(in_values, out_field_type, options, + ctx->exec_context())); DCHECK(cast_values.is_array()); out_array->child_data.push_back(cast_values.array()); } diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index d7f73e2bb7bfe..528e6acded618 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -4076,92 +4076,95 @@ TEST(Cast, StructToBiggerNullableStruct) { TEST(Cast, StructToDifferentNullabilityStruct) { { // OK to go from non-nullable to nullable... - std::vector> fields_src_non_nullable = { + std::vector> fields_src = { std::make_shared("a", int8(), false), std::make_shared("b", int8(), false), std::make_shared("c", int8(), false)}; - std::shared_ptr a_src_non_nullable, b_src_non_nullable, c_src_non_nullable; - a_src_non_nullable = ArrayFromJSON(int8(), "[11, 23, 56]"); - b_src_non_nullable = ArrayFromJSON(int8(), "[32, 46, 37]"); - c_src_non_nullable = ArrayFromJSON(int8(), "[95, 11, 44]"); - ASSERT_OK_AND_ASSIGN( - auto src_non_nullable, - StructArray::Make({a_src_non_nullable, b_src_non_nullable, c_src_non_nullable}, - fields_src_non_nullable)); - - std::shared_ptr a_dest_nullable, b_dest_nullable, c_dest_nullable; - a_dest_nullable = ArrayFromJSON(int64(), "[11, 23, 56]"); - b_dest_nullable = ArrayFromJSON(int64(), "[32, 46, 37]"); - c_dest_nullable = ArrayFromJSON(int64(), "[95, 11, 44]"); - - std::vector> fields_dest1_nullable = { + std::vector> arrays_src = { + ArrayFromJSON(int8(), "[11, 23, 56]"), + ArrayFromJSON(int8(), "[32, 46, 37]"), + ArrayFromJSON(int8(), "[95, 11, 44]"), + }; + ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make(arrays_src, fields_src)); + + std::vector> fields_dest = { std::make_shared("a", int64(), true), std::make_shared("b", int64(), true), - std::make_shared("c", int64(), true)}; - ASSERT_OK_AND_ASSIGN( - auto dest1_nullable, - StructArray::Make({a_dest_nullable, b_dest_nullable, c_dest_nullable}, - fields_dest1_nullable)); - CheckCast(src_non_nullable, dest1_nullable); - - std::vector> fields_dest2_nullable = { - std::make_shared("a", int64(), true), - std::make_shared("c", int64(), true)}; - ASSERT_OK_AND_ASSIGN( - auto dest2_nullable, - StructArray::Make({a_dest_nullable, c_dest_nullable}, fields_dest2_nullable)); - CheckCast(src_non_nullable, dest2_nullable); - - std::vector> fields_dest3_nullable = { - std::make_shared("b", int64(), true)}; - ASSERT_OK_AND_ASSIGN(auto dest3_nullable, - StructArray::Make({b_dest_nullable}, fields_dest3_nullable)); - CheckCast(src_non_nullable, dest3_nullable); + std::make_shared("c", int64(), true), + }; + std::vector> arrays_dest = { + ArrayFromJSON(int64(), "[11, 23, 56]"), + ArrayFromJSON(int64(), "[32, 46, 37]"), + ArrayFromJSON(int64(), "[95, 11, 44]"), + }; + ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make(arrays_dest, fields_dest)); + CheckCast(src, dest); + + std::vector> fields_dest_ac = {fields_dest[0], fields_dest[2]}; + std::vector> arrays_dest_ac = {arrays_dest[0], arrays_dest[2]}; + ASSERT_OK_AND_ASSIGN(auto dest_ac, StructArray::Make(arrays_dest_ac, fields_dest_ac)); + CheckCast(src, dest_ac); + + std::vector> fields_dest_b = {fields_dest[1]}; + std::vector> arrays_dest_b = {arrays_dest[1]}; + ASSERT_OK_AND_ASSIGN(auto dest_b, StructArray::Make(arrays_dest_b, fields_dest_b)); + CheckCast(src, dest_b); } { - // But NOT OK to go from nullable to non-nullable... - std::vector> fields_src_nullable = { + // But when going from nullable to non-nullable, all data must be non-null... + std::vector> fields_src = { std::make_shared("a", int8(), true), std::make_shared("b", int8(), true), std::make_shared("c", int8(), true)}; - std::shared_ptr a_src_nullable, b_src_nullable, c_src_nullable; - a_src_nullable = ArrayFromJSON(int8(), "[1, null, 5]"); - b_src_nullable = ArrayFromJSON(int8(), "[3, 4, null]"); - c_src_nullable = ArrayFromJSON(int8(), "[9, 11, 44]"); - ASSERT_OK_AND_ASSIGN( - auto src_nullable, - StructArray::Make({a_src_nullable, b_src_nullable, c_src_nullable}, - fields_src_nullable)); - - std::vector> fields_dest1_non_nullable = { + std::vector> arrays_src = { + ArrayFromJSON(int8(), "[1, null, 5]"), + ArrayFromJSON(int8(), "[3, 4, null]"), + ArrayFromJSON(int8(), "[9, 11, 44]"), + }; + ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make(arrays_src, fields_src)); + + std::vector> fields_dest = { std::make_shared("a", int64(), false), std::make_shared("b", int64(), false), std::make_shared("c", int64(), false)}; - const auto dest1_non_nullable = arrow::struct_(fields_dest1_non_nullable); - const auto options1_non_nullable = CastOptions::Safe(dest1_non_nullable); EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("cannot cast nullable field to non-nullable field"), - Cast(src_nullable, options1_non_nullable)); + Invalid, + ::testing::HasSubstr( + "field 'a' of type int8 has nulls. Can't cast to non-nullable field 'a' " + "of type int64"), + Cast(src, CastOptions::Safe(arrow::struct_(fields_dest)))); - std::vector> fields_dest2_non_nullable = { - std::make_shared("a", int64(), false), - std::make_shared("c", int64(), false)}; - const auto dest2_non_nullable = arrow::struct_(fields_dest2_non_nullable); - const auto options2_non_nullable = CastOptions::Safe(dest2_non_nullable); + std::vector> fields_dest_ac = {fields_dest[0], fields_dest[2]}; EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("cannot cast nullable field to non-nullable field"), - Cast(src_nullable, options2_non_nullable)); - - std::vector> fields_dest3_non_nullable = { - std::make_shared("c", int64(), false)}; - const auto dest3_non_nullable = arrow::struct_(fields_dest3_non_nullable); - const auto options3_non_nullable = CastOptions::Safe(dest3_non_nullable); + Invalid, + ::testing::HasSubstr( + "field 'a' of type int8 has nulls. Can't cast to non-nullable field 'a' " + "of type int64"), + Cast(src, CastOptions::Safe(arrow::struct_(fields_dest_ac)))); + + // if we only select a field with no nulls, it should be fine: + std::vector> fields_dest_c = {fields_dest[2]}; + std::vector> arrays_dest_c = { + ArrayFromJSON(int64(), "[9, 11, 44]")}; + ASSERT_OK_AND_ASSIGN(auto dest_c, StructArray::Make(arrays_dest_c, fields_dest_c)); + CheckCast(src, dest_c); + + // A slice that doesn't contain nulls is castable... + std::vector> arrays_dest_0 = { + ArrayFromJSON(int64(), "[1]"), + ArrayFromJSON(int64(), "[3]"), + ArrayFromJSON(int64(), "[9]"), + }; + ASSERT_OK_AND_ASSIGN(auto dest_0, StructArray::Make(arrays_dest_0, fields_dest)); + CheckCast(src->Slice(0, 1), dest_0); + + // ...but a slice that contains nulls will error. EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("cannot cast nullable field to non-nullable field"), - Cast(src_nullable, options3_non_nullable)); + Invalid, + ::testing::HasSubstr( + "field 'a' of type int8 has nulls. Can't cast to non-nullable field 'a' " + "of type int64"), + Cast(src->Slice(1, 3), CastOptions::Safe(arrow::struct_(fields_dest)))); } }