From beb13da8605126871a43fd6a43953d85a360a817 Mon Sep 17 00:00:00 2001 From: ffacs Date: Wed, 31 Jan 2024 08:52:32 -0800 Subject: [PATCH] ORC-1602: [C++] limit compression block size limit compression block size on c++ side. to fix https://github.com/apache/orc/issues/1727 UT passed NO Closes #1774 from ffacs/ORC-1602. Authored-by: ffacs Signed-off-by: Dongjoon Hyun --- c++/include/orc/Writer.hh | 2 ++ c++/src/Writer.cc | 3 +++ c++/test/TestWriter.cc | 12 +++++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh index 78b0b97d25..a4589595ae 100644 --- a/c++/include/orc/Writer.hh +++ b/c++/include/orc/Writer.hh @@ -73,6 +73,8 @@ namespace orc { /** * Set the data compression block size. + * Should less then 1 << 23 bytes (8M) which is limited by the + * 3 bytes size of compression block header (1 bit for isOriginal and 23 bits for length) */ WriterOptions& setCompressionBlockSize(uint64_t size); diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc index b5ae6b74d8..35b8bb86d1 100644 --- a/c++/src/Writer.cc +++ b/c++/src/Writer.cc @@ -110,6 +110,9 @@ namespace orc { } WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) { + if (size >= (1 << 23)) { + throw std::invalid_argument("Compression block size cannot be greater or equal than 8M"); + } privateBits->compressionBlockSize = size; return *this; } diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc index 506887e4f2..46b004627a 100644 --- a/c++/test/TestWriter.cc +++ b/c++/test/TestWriter.cc @@ -1901,5 +1901,15 @@ namespace orc { } } + TEST_P(WriterTest, testValidateOptions) { + WriterOptions options; + constexpr uint64_t compressionBlockSizeThreshold = (1 << 23) - 1; + EXPECT_NO_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold)); + EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold + 1), + std::invalid_argument); + EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold + 2), + std::invalid_argument); + } + INSTANTIATE_TEST_CASE_P(OrcTest, WriterTest, Values(FileVersion::v_0_11(), FileVersion::v_0_12())); -} +} // namespace orc