Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into cudftestutil-followup
Browse files Browse the repository at this point in the history
  • Loading branch information
lamarrr authored Mar 1, 2025
2 parents 10d013f + 83a29ce commit a895461
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 2 deletions.
12 changes: 11 additions & 1 deletion cpp/include/cudf/interop.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -327,6 +327,8 @@ unique_device_array_t to_arrow_host(
*
* @throws cudf::data_type_error if the input array is not a struct array.
*
* @throws std::overflow_error if the input arrow object exceeds the column size limit.
*
* The conversion will not call release on the input Array.
*
* @param schema `ArrowSchema` pointer to describe the type of the data
Expand Down Expand Up @@ -367,6 +369,8 @@ std::unique_ptr<cudf::column> from_arrow_column(
*
* @throws std::invalid_argument if the device_type is not `ARROW_DEVICE_CPU`
*
* @throws std::overflow_error if the input arrow object exceeds the column size limit.
*
* @throws cudf::data_type_error if the input array is not a struct array,
* non-struct arrays should be passed to `from_arrow_host_column` instead.
*
Expand Down Expand Up @@ -411,6 +415,8 @@ std::unique_ptr<table> from_arrow_stream(
*
* @throws cudf::data_type_error if input arrow data type is not supported in cudf.
*
* @throws std::overflow_error if the input arrow object exceeds the column size limit.
*
* The conversion will not call release on the input Array.
*
* @param schema `ArrowSchema` pointer to describe the type of the data
Expand Down Expand Up @@ -483,6 +489,8 @@ using unique_table_view_t =
*
* @throws cudf::data_type_error if the input arrow data type is not supported.
*
* @throws std::overflow_error if the input arrow object exceeds the column size limit.
*
* Each child of the input struct will be the columns of the resulting table_view.
*
* @note The custom deleter used for the unique_ptr to the table_view maintains ownership
Expand Down Expand Up @@ -528,6 +536,8 @@ using unique_column_view_t =
*
* @throws cudf::data_type_error input arrow data type is not supported.
*
* @throws std::overflow_error if the input arrow object exceeds the column size limit.
*
* @note The custom deleter used for the unique_ptr to the table_view maintains ownership
* over any memory which is allocated, such as converting boolean columns from the bitmap
* used by Arrow to the 1-byte per value for cudf.
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/interop/from_arrow_device.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
#include <nanoarrow/nanoarrow.hpp>
#include <nanoarrow/nanoarrow_device.h>

#include <cstdint>
#include <limits>
#include <stdexcept>

namespace cudf {

namespace detail {
Expand Down Expand Up @@ -317,6 +321,11 @@ dispatch_tuple_t get_column(ArrowSchemaView* schema,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_EXPECTS(
input->length <= static_cast<std::int64_t>(std::numeric_limits<cudf::size_type>::max()),
"Total number of rows in Arrow column exceeds the column size limit.",
std::overflow_error);

return type.id() != type_id::EMPTY
? std::move(type_dispatcher(
type, dispatch_from_arrow_device{}, schema, input, type, skip_mask, stream, mr))
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/interop/from_arrow_host.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
#include <nanoarrow/nanoarrow.hpp>
#include <nanoarrow/nanoarrow_device.h>

#include <cstdint>
#include <limits>
#include <stdexcept>

namespace cudf {
namespace detail {

Expand Down Expand Up @@ -381,6 +385,11 @@ std::unique_ptr<column> get_column_copy(ArrowSchemaView* schema,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_EXPECTS(
input->length <= static_cast<std::int64_t>(std::numeric_limits<cudf::size_type>::max()),
"Total number of rows in Arrow column exceeds the column size limit.",
std::overflow_error);

return type.id() != type_id::EMPTY
? std::move(type_dispatcher(
type, dispatch_copy_from_arrow_host{stream, mr}, schema, input, type, skip_mask))
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/interop/from_arrow_stream.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -121,6 +121,7 @@ std::unique_ptr<table> from_arrow_stream(ArrowArrayStream* input,

schema.release(&schema);

if (chunks.size() == 1) { return std::move(chunks[0]); }
auto chunk_views = std::vector<table_view>{};
chunk_views.reserve(chunks.size());
std::transform(
Expand Down

0 comments on commit a895461

Please sign in to comment.