diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh index d1f7b4d188..047ee9ffc5 100644 --- a/c++/include/orc/Writer.hh +++ b/c++/include/orc/Writer.hh @@ -77,6 +77,8 @@ namespace orc { /** * Set the data compression block size. + * Should less then 1 << 23 bytes (8M) which is limited by the + * 3 bytes size of compression block header (1 bit for isOriginal and 23 bits for length) */ WriterOptions& setCompressionBlockSize(uint64_t size); diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc index f485e78d99..b0d0b0c59a 100644 --- a/c++/src/Writer.cc +++ b/c++/src/Writer.cc @@ -338,6 +338,12 @@ namespace orc { const WriterId WriterImpl::writerId = WriterId::ORC_CPP_WRITER; + static void validateOptions(const WriterOptions& opts) { + if (opts.getCompressionBlockSize() >= (1 << 23)) { + throw std::invalid_argument("Compression block size cannot be greater or equal than 8M"); + } + } + WriterImpl::WriterImpl(const Type& t, OutputStream* stream, const WriterOptions& opts) : outStream(stream), options(opts), type(t) { streamsFactory = createStreamsFactory(options, outStream); @@ -347,6 +353,8 @@ namespace orc { stripesAtLastFlush = 0; lastFlushOffset = 0; + validateOptions(opts); + useTightNumericVector = opts.getUseTightNumericVector(); // compression stream for stripe footer, file footer and metadata diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc index dcc7f49a0f..28090cb130 100644 --- a/c++/test/TestWriter.cc +++ b/c++/test/TestWriter.cc @@ -16,6 +16,7 @@ * limitations under the License. */ +#include "gtest/gtest.h" #include "orc/ColumnPrinter.hh" #include "orc/OrcFile.hh" @@ -29,6 +30,7 @@ #include #include #include +#include #ifdef __clang__ DIAGNOSTIC_IGNORE("-Wmissing-variable-declarations") @@ -2191,6 +2193,24 @@ namespace orc { } } + TEST_P(WriterTest, testValidateOptions) { + MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE); + MemoryPool* pool = getDefaultPool(); + std::unique_ptr type(Type::buildTypeFromString("struct")); + + uint64_t stripeSize = 16 * 1024; // 16K + auto compressionKind = CompressionKind_NONE; + + EXPECT_NO_THROW(createWriter(stripeSize, /* compressionBlockSize*/ (1 << 23) - 1, + compressionKind, *type, pool, &memStream, fileVersion)); + EXPECT_THROW(createWriter(stripeSize, /* compressionBlockSize*/ (1 << 23), compressionKind, + *type, pool, &memStream, fileVersion), + std::invalid_argument); + EXPECT_THROW(createWriter(stripeSize, /* compressionBlockSize*/ (1 << 23) + 1, compressionKind, + *type, pool, &memStream, fileVersion), + std::invalid_argument); + } + INSTANTIATE_TEST_SUITE_P(OrcTest, WriterTest, Values(FileVersion::v_0_11(), FileVersion::v_0_12(), FileVersion::UNSTABLE_PRE_2_0()));