Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce memory use when writing tables with very short columns to ORC #18136

Open
wants to merge 10 commits into
base: branch-25.04
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2226,6 +2226,22 @@ stripe_dictionaries build_dictionaries(orc_table_view& orc_table,
std::move(dict_order_owner)};
}

[[nodiscard]] uint32_t find_largest_stream_size(device_2dspan<stripe_stream const> ss,
rmm::cuda_stream_view stream)
{
auto const longest_stream = thrust::max_element(
rmm::exec_policy(stream),
ss.data(),
ss.data() + ss.count(),
cuda::proclaim_return_type<bool>([] __device__(auto const& lhs, auto const& rhs) {
return lhs.stream_size < rhs.stream_size;
}));

auto const h_longest_stream = cudf::detail::make_host_vector_sync(
device_span<stripe_stream const>{longest_stream, 1}, stream);
return h_longest_stream[0].stream_size;
}

/**
* @brief Perform the processing steps needed to convert the input table into the output ORC data
* for writing, such as compression and ORC encoding.
Expand Down Expand Up @@ -2319,7 +2335,9 @@ auto convert_table_to_orc_data(table_view const& input,
size_t compressed_bfr_size = 0;
size_t num_compressed_blocks = 0;

auto const max_compressed_block_size = max_compressed_size(compression, compression_blocksize);
auto const largest_stream_size = find_largest_stream_size(strm_descs, stream);
auto const max_compressed_block_size =
max_compressed_size(compression, std::min<size_t>(largest_stream_size, compression_blocksize));
auto const padded_max_compressed_block_size =
util::round_up_unsafe<size_t>(max_compressed_block_size, block_align);
auto const padded_block_header_size =
Expand Down
1 change: 1 addition & 0 deletions cpp/src/utilities/host_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
namespace cudf {

namespace {

class fixed_pinned_pool_memory_resource {
using upstream_mr = rmm::mr::pinned_host_memory_resource;
using host_pooled_mr = rmm::mr::pool_memory_resource<upstream_mr>;
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ ConfigureTest(
ConfigureTest(
ORC_TEST io/orc_chunked_reader_test.cu io/orc_test.cpp
GPUS 1
PERCENT 30
PERCENT 100
)
ConfigureTest(
PARQUET_TEST
Expand Down Expand Up @@ -340,7 +340,7 @@ ConfigureTest(JSON_TREE_CSR io/json/json_tree_csr.cu)
ConfigureTest(
DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp
GPUS 1
PERCENT 30
PERCENT 100
)
target_link_libraries(DATA_CHUNK_SOURCE_TEST PRIVATE ZLIB::ZLIB)
ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu)
Expand Down
Loading