diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 00000000000..fbb059c2ee2 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,16 @@ +Checks: "-*, + readability-identifier-naming, +" + +CheckOptions: + [ + { key: readability-identifier-naming.PrivateMemberSuffix, value: "_" }, + { key: readability-identifier-naming.ProtectedMemberSuffix, value: "" }, + { key: readability-identifier-naming.PublicMemberSuffix, value: "" }, + { key: readability-identifier-naming.ParameterCase, value: "camelBack" }, + { key: readability-identifier-naming.ParameterIgnoredRegexp, value: "^[a-zA-Z]$" }, + ] + +WarningsAsErrors: '' +HeaderFilterRegex: '.*' +FormatStyle: none \ No newline at end of file diff --git a/.gitignore b/.gitignore index c0a4d44e074..2ff46e96942 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ dependency-reduced-pom.xml .java-version java/bench/data *.swp +.cache/* diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh index 9da67a3f199..f3cfc643395 100644 --- a/c++/include/orc/Common.hh +++ b/c++/include/orc/Common.hh @@ -29,32 +29,32 @@ namespace orc { class FileVersion { private: - uint32_t majorVersion; - uint32_t minorVersion; + uint32_t majorVersion_; + uint32_t minorVersion_; public: static const FileVersion& v_0_11(); static const FileVersion& v_0_12(); static const FileVersion& UNSTABLE_PRE_2_0(); - FileVersion(uint32_t major, uint32_t minor) : majorVersion(major), minorVersion(minor) {} + FileVersion(uint32_t major, uint32_t minor) : majorVersion_(major), minorVersion_(minor) {} /** * Get major version */ uint32_t getMajor() const { - return this->majorVersion; + return this->majorVersion_; } /** * Get minor version */ uint32_t getMinor() const { - return this->minorVersion; + return this->minorVersion_; } bool operator==(const FileVersion& right) const { - return this->majorVersion == right.getMajor() && this->minorVersion == right.getMinor(); + return this->majorVersion_ == right.getMajor() && this->minorVersion_ == right.getMinor(); } bool operator!=(const FileVersion& right) const { diff --git a/c++/include/orc/Exceptions.hh b/c++/include/orc/Exceptions.hh index 0536dbd164a..97cf5d8a0df 100644 --- a/c++/include/orc/Exceptions.hh +++ b/c++/include/orc/Exceptions.hh @@ -28,8 +28,8 @@ namespace orc { class NotImplementedYet : public std::logic_error { public: - explicit NotImplementedYet(const std::string& what_arg); - explicit NotImplementedYet(const char* what_arg); + explicit NotImplementedYet(const std::string& whatArg); + explicit NotImplementedYet(const char* whatArg); ~NotImplementedYet() noexcept override; NotImplementedYet(const NotImplementedYet&); @@ -39,8 +39,8 @@ namespace orc { class ParseError : public std::runtime_error { public: - explicit ParseError(const std::string& what_arg); - explicit ParseError(const char* what_arg); + explicit ParseError(const std::string& whatArg); + explicit ParseError(const char* whatArg); ~ParseError() noexcept override; ParseError(const ParseError&); @@ -50,8 +50,8 @@ namespace orc { class InvalidArgument : public std::runtime_error { public: - explicit InvalidArgument(const std::string& what_arg); - explicit InvalidArgument(const char* what_arg); + explicit InvalidArgument(const std::string& whatArg); + explicit InvalidArgument(const char* whatArg); ~InvalidArgument() noexcept override; InvalidArgument(const InvalidArgument&); @@ -61,8 +61,8 @@ namespace orc { class SchemaEvolutionError : public std::logic_error { public: - explicit SchemaEvolutionError(const std::string& what_arg); - explicit SchemaEvolutionError(const char* what_arg); + explicit SchemaEvolutionError(const std::string& whatArg); + explicit SchemaEvolutionError(const char* whatArg); virtual ~SchemaEvolutionError() noexcept override; SchemaEvolutionError(const SchemaEvolutionError&); SchemaEvolutionError& operator=(const SchemaEvolutionError&) = delete; diff --git a/c++/include/orc/Int128.hh b/c++/include/orc/Int128.hh index bcb4a58e22c..6954c771cf1 100644 --- a/c++/include/orc/Int128.hh +++ b/c++/include/orc/Int128.hh @@ -37,8 +37,8 @@ namespace orc { class Int128 { public: Int128() { - highbits = 0; - lowbits = 0; + highbits_ = 0; + lowbits_ = 0; } /** @@ -46,11 +46,11 @@ namespace orc { */ Int128(int64_t right) { if (right >= 0) { - highbits = 0; - lowbits = static_cast(right); + highbits_ = 0; + lowbits_ = static_cast(right); } else { - highbits = -1; - lowbits = static_cast(right); + highbits_ = -1; + lowbits_ = static_cast(right); } } @@ -58,8 +58,8 @@ namespace orc { * Create from the twos complement representation. */ Int128(int64_t high, uint64_t low) { - highbits = high; - lowbits = low; + highbits_ = high; + lowbits_ = low; } /** @@ -78,16 +78,16 @@ namespace orc { static Int128 minimumValue(); Int128& negate() { - lowbits = ~lowbits + 1; - highbits = ~highbits; - if (lowbits == 0) { - highbits += 1; + lowbits_ = ~lowbits_ + 1; + highbits_ = ~highbits_; + if (lowbits_ == 0) { + highbits_ += 1; } return *this; } Int128& abs() { - if (highbits < 0) { + if (highbits_ < 0) { negate(); } return *this; @@ -100,8 +100,8 @@ namespace orc { } Int128& invert() { - lowbits = ~lowbits; - highbits = ~highbits; + lowbits_ = ~lowbits_; + highbits_ = ~highbits_; return *this; } @@ -111,12 +111,12 @@ namespace orc { * @return *this */ Int128& operator+=(const Int128& right) { - uint64_t sum = lowbits + right.lowbits; - highbits += right.highbits; - if (sum < lowbits) { - highbits += 1; + uint64_t sum = lowbits_ + right.lowbits_; + highbits_ += right.highbits_; + if (sum < lowbits_) { + highbits_ += 1; } - lowbits = sum; + lowbits_ = sum; return *this; } @@ -126,12 +126,12 @@ namespace orc { * @return *this */ Int128& operator-=(const Int128& right) { - uint64_t diff = lowbits - right.lowbits; - highbits -= right.highbits; - if (diff > lowbits) { - highbits -= 1; + uint64_t diff = lowbits_ - right.lowbits_; + highbits_ -= right.highbits_; + if (diff > lowbits_) { + highbits_ -= 1; } - lowbits = diff; + lowbits_ = diff; return *this; } @@ -162,8 +162,8 @@ namespace orc { * @return *this */ Int128& operator|=(const Int128& right) { - lowbits |= right.lowbits; - highbits |= right.highbits; + lowbits_ |= right.lowbits_; + highbits_ |= right.highbits_; return *this; } @@ -173,8 +173,8 @@ namespace orc { * @return *this */ Int128& operator&=(const Int128& right) { - lowbits &= right.lowbits; - highbits &= right.highbits; + lowbits_ &= right.lowbits_; + highbits_ &= right.highbits_; return *this; } @@ -196,15 +196,15 @@ namespace orc { Int128& operator<<=(uint32_t bits) { if (bits != 0) { if (bits < 64) { - highbits <<= bits; - highbits |= (lowbits >> (64 - bits)); - lowbits <<= bits; + highbits_ <<= bits; + highbits_ |= (lowbits_ >> (64 - bits)); + lowbits_ <<= bits; } else if (bits < 128) { - highbits = static_cast(lowbits) << (bits - 64); - lowbits = 0; + highbits_ = static_cast(lowbits_) << (bits - 64); + lowbits_ = 0; } else { - highbits = 0; - lowbits = 0; + highbits_ = 0; + lowbits_ = 0; } } return *this; @@ -217,74 +217,74 @@ namespace orc { Int128& operator>>=(uint32_t bits) { if (bits != 0) { if (bits < 64) { - lowbits >>= bits; - lowbits |= static_cast(highbits << (64 - bits)); - highbits = static_cast(static_cast(highbits) >> bits); + lowbits_ >>= bits; + lowbits_ |= static_cast(highbits_ << (64 - bits)); + highbits_ = static_cast(static_cast(highbits_) >> bits); } else if (bits < 128) { - lowbits = static_cast(highbits >> (bits - 64)); - highbits = highbits >= 0 ? 0 : -1l; + lowbits_ = static_cast(highbits_ >> (bits - 64)); + highbits_ = highbits_ >= 0 ? 0 : -1l; } else { - highbits = highbits >= 0 ? 0 : -1l; - lowbits = static_cast(highbits); + highbits_ = highbits_ >= 0 ? 0 : -1l; + lowbits_ = static_cast(highbits_); } } return *this; } bool operator==(const Int128& right) const { - return highbits == right.highbits && lowbits == right.lowbits; + return highbits_ == right.highbits_ && lowbits_ == right.lowbits_; } bool operator!=(const Int128& right) const { - return highbits != right.highbits || lowbits != right.lowbits; + return highbits_ != right.highbits_ || lowbits_ != right.lowbits_; } bool operator<(const Int128& right) const { - if (highbits == right.highbits) { - return lowbits < right.lowbits; + if (highbits_ == right.highbits_) { + return lowbits_ < right.lowbits_; } else { - return highbits < right.highbits; + return highbits_ < right.highbits_; } } bool operator<=(const Int128& right) const { - if (highbits == right.highbits) { - return lowbits <= right.lowbits; + if (highbits_ == right.highbits_) { + return lowbits_ <= right.lowbits_; } else { - return highbits <= right.highbits; + return highbits_ <= right.highbits_; } } bool operator>(const Int128& right) const { - if (highbits == right.highbits) { - return lowbits > right.lowbits; + if (highbits_ == right.highbits_) { + return lowbits_ > right.lowbits_; } else { - return highbits > right.highbits; + return highbits_ > right.highbits_; } } bool operator>=(const Int128& right) const { - if (highbits == right.highbits) { - return lowbits >= right.lowbits; + if (highbits_ == right.highbits_) { + return lowbits_ >= right.lowbits_; } else { - return highbits >= right.highbits; + return highbits_ >= right.highbits_; } } uint32_t hash() const { - return static_cast(highbits >> 32) ^ static_cast(highbits) ^ - static_cast(lowbits >> 32) ^ static_cast(lowbits); + return static_cast(highbits_ >> 32) ^ static_cast(highbits_) ^ + static_cast(lowbits_ >> 32) ^ static_cast(lowbits_); } /** * Does this value fit into a long? */ bool fitsInLong() const { - switch (highbits) { + switch (highbits_) { case 0: - return 0 == (lowbits & LONG_SIGN_BIT); + return 0 == (lowbits_ & LONG_SIGN_BIT); case -1: - return 0 != (lowbits & LONG_SIGN_BIT); + return 0 != (lowbits_ & LONG_SIGN_BIT); default: return false; } @@ -295,7 +295,7 @@ namespace orc { */ int64_t toLong() const { if (fitsInLong()) { - return static_cast(lowbits); + return static_cast(lowbits_); } throw std::range_error("Int128 too large to convert to long"); } @@ -331,14 +331,14 @@ namespace orc { * Get the high bits of the twos complement representation of the number. */ int64_t getHighBits() const { - return highbits; + return highbits_; } /** * Get the low bits of the twos complement representation of the number. */ uint64_t getLowBits() const { - return lowbits; + return lowbits_; } /** @@ -352,8 +352,8 @@ namespace orc { private: static const uint64_t LONG_SIGN_BIT = 0x8000000000000000u; - int64_t highbits; - uint64_t lowbits; + int64_t highbits_; + uint64_t lowbits_; }; /** diff --git a/c++/include/orc/MemoryPool.hh b/c++/include/orc/MemoryPool.hh index 6d999d3aa8b..a914e5f260c 100644 --- a/c++/include/orc/MemoryPool.hh +++ b/c++/include/orc/MemoryPool.hh @@ -36,50 +36,50 @@ namespace orc { template class DataBuffer { private: - MemoryPool& memoryPool; - T* buf; + MemoryPool& memoryPool_; + T* buf_; // current size - uint64_t currentSize; + uint64_t currentSize_; // maximal capacity (actual allocated memory) - uint64_t currentCapacity; + uint64_t currentCapacity_; // not implemented DataBuffer(DataBuffer& buffer); DataBuffer& operator=(DataBuffer& buffer); public: - DataBuffer(MemoryPool& pool, uint64_t _size = 0); + DataBuffer(MemoryPool& pool, uint64_t size = 0); DataBuffer(DataBuffer&& buffer) noexcept; virtual ~DataBuffer(); T* data() { - return buf; + return buf_; } const T* data() const { - return buf; + return buf_; } uint64_t size() const { - return currentSize; + return currentSize_; } uint64_t capacity() const { - return currentCapacity; + return currentCapacity_; } const T& operator[](uint64_t i) const { - return buf[i]; + return buf_[i]; } T& operator[](uint64_t i) { - return buf[i]; + return buf_[i]; } - void reserve(uint64_t _size); - void resize(uint64_t _size); + void reserve(uint64_t size); + void resize(uint64_t size); void zeroOut(); }; diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh index b631c2c6ea0..4b254593ee8 100644 --- a/c++/include/orc/Reader.hh +++ b/c++/include/orc/Reader.hh @@ -67,7 +67,7 @@ namespace orc { */ class ReaderOptions { private: - std::unique_ptr privateBits; + std::unique_ptr privateBits_; public: ReaderOptions(); @@ -145,7 +145,7 @@ namespace orc { */ class RowReaderOptions { private: - std::unique_ptr privateBits; + std::unique_ptr privateBits_; public: RowReaderOptions(); diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh index 047ee9ffc52..7968fbce7f7 100644 --- a/c++/include/orc/Writer.hh +++ b/c++/include/orc/Writer.hh @@ -55,7 +55,7 @@ namespace orc { */ class WriterOptions { private: - std::unique_ptr privateBits; + std::unique_ptr privateBits_; public: WriterOptions(); diff --git a/c++/include/orc/sargs/Literal.hh b/c++/include/orc/sargs/Literal.hh index 9ce958302da..8366ce7a4a1 100644 --- a/c++/include/orc/sargs/Literal.hh +++ b/c++/include/orc/sargs/Literal.hh @@ -39,7 +39,7 @@ namespace orc { Timestamp(const Timestamp&) = default; Timestamp(Timestamp&&) = default; ~Timestamp() = default; - Timestamp(int64_t second_, int32_t nanos_) : second(second_), nanos(nanos_) { + Timestamp(int64_t secondValue, int32_t nanoValue) : second(secondValue), nanos(nanoValue) { // PASS } Timestamp& operator=(const Timestamp&) = default; @@ -130,15 +130,15 @@ namespace orc { * Check if a literal is null */ bool isNull() const { - return mIsNull; + return mIsNull_; } PredicateDataType getType() const { - return mType; + return mType_; } std::string toString() const; size_t getHashCode() const { - return mHashCode; + return mHashCode_; } private: @@ -158,13 +158,13 @@ namespace orc { }; private: - LiteralVal mValue; // data value for this literal if not null - PredicateDataType mType; // data type of the literal - size_t mSize; // size of mValue if it is Buffer - int32_t mPrecision; // precision of decimal type - int32_t mScale; // scale of decimal type - bool mIsNull; // whether this literal is null - size_t mHashCode; // precomputed hash code for the literal + LiteralVal mValue_; // data value for this literal if not null + PredicateDataType mType_; // data type of the literal + size_t mSize_; // size of mValue if it is Buffer + int32_t mPrecision_; // precision of decimal type + int32_t mScale_; // scale of decimal type + bool mIsNull_; // whether this literal is null + size_t mHashCode_; // precomputed hash code for the literal }; } // namespace orc diff --git a/c++/src/BlockBuffer.cc b/c++/src/BlockBuffer.cc index 1f7843fad7d..83aebef3da3 100644 --- a/c++/src/BlockBuffer.cc +++ b/c++/src/BlockBuffer.cc @@ -24,56 +24,56 @@ namespace orc { - BlockBuffer::BlockBuffer(MemoryPool& pool, uint64_t _blockSize) - : memoryPool(pool), currentSize(0), currentCapacity(0), blockSize(_blockSize) { - if (blockSize == 0) { + BlockBuffer::BlockBuffer(MemoryPool& pool, uint64_t blockSize) + : memoryPool_(pool), currentSize_(0), currentCapacity_(0), blockSize_(blockSize) { + if (blockSize_ == 0) { throw std::logic_error("Block size cannot be zero"); } - reserve(blockSize); + reserve(blockSize_); } BlockBuffer::~BlockBuffer() { - for (size_t i = 0; i < blocks.size(); ++i) { - memoryPool.free(blocks[i]); + for (size_t i = 0; i < blocks_.size(); ++i) { + memoryPool_.free(blocks_[i]); } - blocks.clear(); - currentSize = currentCapacity = 0; + blocks_.clear(); + currentSize_ = currentCapacity_ = 0; } BlockBuffer::Block BlockBuffer::getBlock(uint64_t blockIndex) const { if (blockIndex >= getBlockNumber()) { throw std::out_of_range("Block index out of range"); } - return Block(blocks[blockIndex], std::min(currentSize - blockIndex * blockSize, blockSize)); + return Block(blocks_[blockIndex], std::min(currentSize_ - blockIndex * blockSize_, blockSize_)); } BlockBuffer::Block BlockBuffer::getNextBlock() { - if (currentSize < currentCapacity) { - Block emptyBlock(blocks[currentSize / blockSize] + currentSize % blockSize, - blockSize - currentSize % blockSize); - currentSize = (currentSize / blockSize + 1) * blockSize; + if (currentSize_ < currentCapacity_) { + Block emptyBlock(blocks_[currentSize_ / blockSize_] + currentSize_ % blockSize_, + blockSize_ - currentSize_ % blockSize_); + currentSize_ = (currentSize_ / blockSize_ + 1) * blockSize_; return emptyBlock; } else { - resize(currentSize + blockSize); - return Block(blocks.back(), blockSize); + resize(currentSize_ + blockSize_); + return Block(blocks_.back(), blockSize_); } } void BlockBuffer::resize(uint64_t size) { reserve(size); - if (currentCapacity >= size) { - currentSize = size; + if (currentCapacity_ >= size) { + currentSize_ = size; } else { throw std::logic_error("Block buffer resize error"); } } void BlockBuffer::reserve(uint64_t newCapacity) { - while (currentCapacity < newCapacity) { - char* newBlockPtr = memoryPool.malloc(blockSize); + while (currentCapacity_ < newCapacity) { + char* newBlockPtr = memoryPool_.malloc(blockSize_); if (newBlockPtr != nullptr) { - blocks.push_back(newBlockPtr); - currentCapacity += blockSize; + blocks_.push_back(newBlockPtr); + currentCapacity_ += blockSize_; } else { break; } @@ -81,7 +81,7 @@ namespace orc { } void BlockBuffer::writeTo(OutputStream* output, WriterMetrics* metrics) { - if (currentSize == 0) { + if (currentSize_ == 0) { return; } static uint64_t MAX_CHUNK_SIZE = 1024 * 1024 * 1024; @@ -91,13 +91,13 @@ namespace orc { } uint64_t ioCount = 0; uint64_t blockNumber = getBlockNumber(); - // if only exists one block, currentSize is equal to first block size - if (blockNumber == 1 && currentSize <= chunkSize) { + // if only exists one block, currentSize_ is equal to first block size + if (blockNumber == 1 && currentSize_ <= chunkSize) { Block block = getBlock(0); output->write(block.data, block.size); ++ioCount; } else { - char* chunk = memoryPool.malloc(chunkSize); + char* chunk = memoryPool_.malloc(chunkSize); uint64_t chunkOffset = 0; for (uint64_t i = 0; i < blockNumber; ++i) { Block block = getBlock(i); @@ -121,7 +121,7 @@ namespace orc { output->write(chunk, chunkOffset); ++ioCount; } - memoryPool.free(chunk); + memoryPool_.free(chunk); } if (metrics != nullptr) { diff --git a/c++/src/BlockBuffer.hh b/c++/src/BlockBuffer.hh index 0f5f78e3fea..67c2969a95b 100644 --- a/c++/src/BlockBuffer.hh +++ b/c++/src/BlockBuffer.hh @@ -34,15 +34,15 @@ namespace orc { */ class BlockBuffer { private: - MemoryPool& memoryPool; + MemoryPool& memoryPool_; // current buffer size - uint64_t currentSize; + uint64_t currentSize_; // maximal capacity (actual allocated memory) - uint64_t currentCapacity; + uint64_t currentCapacity_; // unit for buffer expansion - const uint64_t blockSize; + const uint64_t blockSize_; // pointers to the start of each block - std::vector blocks; + std::vector blocks_; // non-copy-constructible BlockBuffer(BlockBuffer& buffer) = delete; @@ -66,7 +66,7 @@ namespace orc { uint64_t size; Block() : data(nullptr), size(0) {} - Block(char* _data, uint64_t _size) : data(_data), size(_size) {} + Block(char* dataValue, uint64_t sizeValue) : data(dataValue), size(sizeValue) {} Block(const Block& block) = default; ~Block() = default; }; @@ -94,15 +94,15 @@ namespace orc { * Get the number of blocks that are fully or partially occupied */ uint64_t getBlockNumber() const { - return (currentSize + blockSize - 1) / blockSize; + return (currentSize_ + blockSize_ - 1) / blockSize_; } uint64_t size() const { - return currentSize; + return currentSize_; } uint64_t capacity() const { - return currentCapacity; + return currentCapacity_; } void resize(uint64_t size); diff --git a/c++/src/BloomFilter.cc b/c++/src/BloomFilter.cc index 882c6f42527..b1987de217c 100644 --- a/c++/src/BloomFilter.cc +++ b/c++/src/BloomFilter.cc @@ -37,50 +37,50 @@ namespace orc { * Implementation of BitSet */ BitSet::BitSet(uint64_t numBits) { - mData.resize(static_cast(ceil(static_cast(numBits) / BITS_OF_LONG)), 0); + mData_.resize(static_cast(ceil(static_cast(numBits) / BITS_OF_LONG)), 0); } BitSet::BitSet(const uint64_t* bits, uint64_t numBits) { // caller should make sure numBits is multiple of 64 - mData.resize(numBits >> SHIFT_6_BITS, 0); - memcpy(mData.data(), bits, numBits >> SHIFT_3_BITS); + mData_.resize(numBits >> SHIFT_6_BITS, 0); + memcpy(mData_.data(), bits, numBits >> SHIFT_3_BITS); } void BitSet::set(uint64_t index) { - mData[index >> SHIFT_6_BITS] |= (1ULL << (index % BITS_OF_LONG)); + mData_[index >> SHIFT_6_BITS] |= (1ULL << (index % BITS_OF_LONG)); } bool BitSet::get(uint64_t index) { - return (mData[index >> SHIFT_6_BITS] & (1ULL << (index % BITS_OF_LONG))) != 0; + return (mData_[index >> SHIFT_6_BITS] & (1ULL << (index % BITS_OF_LONG))) != 0; } uint64_t BitSet::bitSize() { - return mData.size() << SHIFT_6_BITS; + return mData_.size() << SHIFT_6_BITS; } void BitSet::merge(const BitSet& other) { - if (mData.size() != other.mData.size()) { + if (mData_.size() != other.mData_.size()) { std::stringstream ss; - ss << "BitSet must be of equal length (" << mData.size() << " != " << other.mData.size() + ss << "BitSet must be of equal length (" << mData_.size() << " != " << other.mData_.size() << ")"; throw std::logic_error(ss.str()); } - for (size_t i = 0; i != mData.size(); i++) { - mData[i] |= other.mData[i]; + for (size_t i = 0; i != mData_.size(); i++) { + mData_[i] |= other.mData_[i]; } } void BitSet::clear() { - memset(mData.data(), 0, sizeof(uint64_t) * mData.size()); + memset(mData_.data(), 0, sizeof(uint64_t) * mData_.size()); } const uint64_t* BitSet::getData() const { - return mData.data(); + return mData_.data(); } bool BitSet::operator==(const BitSet& other) const { - return mData == other.mData; + return mData_ == other.mData_; } /** @@ -127,9 +127,9 @@ namespace orc { uint64_t nb = static_cast(optimalNumOfBits(expectedEntries, fpp)); // make 'mNumBits' multiple of 64 - mNumBits = nb + (BITS_OF_LONG - (nb % BITS_OF_LONG)); - mNumHashFunctions = optimalNumOfHashFunctions(expectedEntries, mNumBits); - mBitSet.reset(new BitSet(mNumBits)); + mNumBits_ = nb + (BITS_OF_LONG - (nb % BITS_OF_LONG)); + mNumHashFunctions_ = optimalNumOfHashFunctions(expectedEntries, mNumBits_); + mBitSet_.reset(new BitSet(mNumBits_)); } void BloomFilterImpl::addBytes(const char* data, int64_t length) { @@ -155,11 +155,11 @@ namespace orc { } uint64_t BloomFilterImpl::getBitSize() const { - return mBitSet->bitSize(); + return mBitSet_->bitSize(); } int32_t BloomFilterImpl::getNumHashFunctions() const { - return mNumHashFunctions; + return mNumHashFunctions_; } DIAGNOSTIC_PUSH @@ -175,17 +175,17 @@ namespace orc { // caller should make sure input proto::BloomFilter is valid since // no check will be performed in the following constructor BloomFilterImpl::BloomFilterImpl(const proto::BloomFilter& bloomFilter) { - mNumHashFunctions = static_cast(bloomFilter.num_hash_functions()); + mNumHashFunctions_ = static_cast(bloomFilter.num_hash_functions()); const std::string& bitsetStr = bloomFilter.utf8bitset(); - mNumBits = bitsetStr.size() << SHIFT_3_BITS; - checkArgument(mNumBits % BITS_OF_LONG == 0, "numBits should be multiple of 64!"); + mNumBits_ = bitsetStr.size() << SHIFT_3_BITS; + checkArgument(mNumBits_ % BITS_OF_LONG == 0, "numBits should be multiple of 64!"); const uint64_t* bitset = reinterpret_cast(bitsetStr.data()); if (isLittleEndian()) { - mBitSet.reset(new BitSet(bitset, mNumBits)); + mBitSet_.reset(new BitSet(bitset, mNumBits_)); } else { - std::vector longs(mNumBits >> SHIFT_6_BITS); + std::vector longs(mNumBits_ >> SHIFT_6_BITS); for (size_t i = 0; i != longs.size(); ++i) { // convert little-endian to big-endian const uint64_t src = bitset[i]; @@ -195,7 +195,7 @@ namespace orc { } } - mBitSet.reset(new BitSet(longs.data(), mNumBits)); + mBitSet_.reset(new BitSet(longs.data(), mNumBits_)); } } @@ -215,14 +215,14 @@ namespace orc { // So we cast hash64 to uint64_t here for an unsigned right shift. int32_t hash2 = static_cast(static_cast(hash64) >> 32); - for (int32_t i = 1; i <= mNumHashFunctions; ++i) { + for (int32_t i = 1; i <= mNumHashFunctions_; ++i) { int32_t combinedHash = hash1 + i * hash2; // hashcode should be positive, flip all the bits if it's negative if (combinedHash < 0) { combinedHash = ~combinedHash; } - uint64_t pos = static_cast(combinedHash) % mNumBits; - mBitSet->set(pos); + uint64_t pos = static_cast(combinedHash) % mNumBits_; + mBitSet_->set(pos); } } @@ -232,14 +232,14 @@ namespace orc { // So we cast hash64 to uint64_t here for an unsigned right shift. int32_t hash2 = static_cast(static_cast(hash64) >> 32); - for (int32_t i = 1; i <= mNumHashFunctions; ++i) { + for (int32_t i = 1; i <= mNumHashFunctions_; ++i) { int32_t combinedHash = hash1 + i * hash2; // hashcode should be positive, flip all the bits if it's negative if (combinedHash < 0) { combinedHash = ~combinedHash; } - uint64_t pos = static_cast(combinedHash) % mNumBits; - if (!mBitSet->get(pos)) { + uint64_t pos = static_cast(combinedHash) % mNumBits_; + if (!mBitSet_->get(pos)) { return false; } } @@ -247,33 +247,33 @@ namespace orc { } void BloomFilterImpl::merge(const BloomFilterImpl& other) { - if (mNumBits != other.mNumBits || mNumHashFunctions != other.mNumHashFunctions) { + if (mNumBits_ != other.mNumBits_ || mNumHashFunctions_ != other.mNumHashFunctions_) { std::stringstream ss; ss << "BloomFilters are not compatible for merging: " - << "this: numBits:" << mNumBits << ",numHashFunctions:" << mNumHashFunctions - << ", that: numBits:" << other.mNumBits << ",numHashFunctions:" << other.mNumHashFunctions; + << "this: numBits:" << mNumBits_ << ",numHashFunctions:" << mNumHashFunctions_ + << ", that: numBits:" << other.mNumBits_ << ",numHashFunctions:" << other.mNumHashFunctions_; throw std::logic_error(ss.str()); } - mBitSet->merge(*other.mBitSet); + mBitSet_->merge(*other.mBitSet_); } void BloomFilterImpl::reset() { - mBitSet->clear(); + mBitSet_->clear(); } void BloomFilterImpl::serialize(proto::BloomFilter& bloomFilter) const { - bloomFilter.set_num_hash_functions(static_cast(mNumHashFunctions)); + bloomFilter.set_num_hash_functions(static_cast(mNumHashFunctions_)); // According to ORC standard, the encoding is a sequence of bytes with // a little endian encoding in the utf8bitset field. if (isLittleEndian()) { // bytes are already organized in little endian; thus no conversion needed - const char* bitset = reinterpret_cast(mBitSet->getData()); + const char* bitset = reinterpret_cast(mBitSet_->getData()); bloomFilter.set_utf8bitset(bitset, sizeInBytes()); } else { std::vector bitset(sizeInBytes() / sizeof(uint64_t), 0); - const uint64_t* longs = mBitSet->getData(); + const uint64_t* longs = mBitSet_->getData(); for (size_t i = 0; i != bitset.size(); ++i) { uint64_t& dst = bitset[i]; const uint64_t src = longs[i]; @@ -287,8 +287,8 @@ namespace orc { } bool BloomFilterImpl::operator==(const BloomFilterImpl& other) const { - return mNumBits == other.mNumBits && mNumHashFunctions == other.mNumHashFunctions && - *mBitSet == *other.mBitSet; + return mNumBits_ == other.mNumBits_ && mNumHashFunctions_ == other.mNumHashFunctions_ && + *mBitSet_ == *other.mBitSet_; } BloomFilter::~BloomFilter() { diff --git a/c++/src/BloomFilter.hh b/c++/src/BloomFilter.hh index d72961a83c3..e8d78663b5e 100644 --- a/c++/src/BloomFilter.hh +++ b/c++/src/BloomFilter.hh @@ -90,7 +90,7 @@ namespace orc { bool operator==(const BitSet& other) const; private: - std::vector mData; + std::vector mData_; }; /** @@ -174,9 +174,9 @@ namespace orc { private: static constexpr double DEFAULT_FPP = 0.05; - uint64_t mNumBits; - int32_t mNumHashFunctions; - std::unique_ptr mBitSet; + uint64_t mNumBits_; + int32_t mNumHashFunctions_; + std::unique_ptr mBitSet_; }; struct BloomFilterUTF8Utils { diff --git a/c++/src/BpackingDefault.cc b/c++/src/BpackingDefault.cc index 5a80bc6fb1e..401a217d35f 100644 --- a/c++/src/BpackingDefault.cc +++ b/c++/src/BpackingDefault.cc @@ -22,7 +22,7 @@ namespace orc { - UnpackDefault::UnpackDefault(RleDecoderV2* dec) : decoder(dec) { + UnpackDefault::UnpackDefault(RleDecoderV2* dec) : decoder_(dec) { // PASS } @@ -34,17 +34,17 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Make sure bitsLeft is 0 before the loop. bitsLeft can only be 0, 4, or 8. - while (decoder->getBitsLeft() > 0 && curIdx < offset + len) { - decoder->setBitsLeft(decoder->getBitsLeft() - 4); - data[curIdx++] = (decoder->getCurByte() >> decoder->getBitsLeft()) & 15; + while (decoder_->getBitsLeft() > 0 && curIdx < offset + len) { + decoder_->setBitsLeft(decoder_->getBitsLeft() - 4); + data[curIdx++] = (decoder_->getCurByte() >> decoder_->getBitsLeft()) & 15; } if (curIdx == offset + len) return; // Exhaust the buffer uint64_t numGroups = (offset + len - curIdx) / 2; - numGroups = std::min(numGroups, static_cast(decoder->bufLength())); + numGroups = std::min(numGroups, static_cast(decoder_->bufLength())); // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); uint32_t localByte; for (uint64_t i = 0; i < numGroups; ++i) { localByte = *buffer++; @@ -52,12 +52,12 @@ namespace orc { data[curIdx + 1] = localByte & 15; curIdx += 2; } - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // readByte() will update 'bufferStart' and 'bufferEnd' - decoder->setCurByte(decoder->readByte()); - decoder->setBitsLeft(8); + decoder_->setCurByte(decoder_->readByte()); + decoder_->setBitsLeft(8); } } @@ -65,18 +65,18 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Exhaust the buffer - int64_t bufferNum = decoder->bufLength(); + int64_t bufferNum = decoder_->bufLength(); bufferNum = std::min(bufferNum, static_cast(offset + len - curIdx)); // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); for (int i = 0; i < bufferNum; ++i) { data[curIdx++] = *buffer++; } - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // readByte() will update 'bufferStart' and 'bufferEnd'. - data[curIdx++] = decoder->readByte(); + data[curIdx++] = decoder_->readByte(); } } @@ -84,23 +84,23 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Exhaust the buffer - int64_t bufferNum = decoder->bufLength() / 2; + int64_t bufferNum = decoder_->bufLength() / 2; bufferNum = std::min(bufferNum, static_cast(offset + len - curIdx)); uint16_t b0, b1; // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); for (int i = 0; i < bufferNum; ++i) { b0 = static_cast(*buffer); b1 = static_cast(*(buffer + 1)); buffer += 2; data[curIdx++] = (b0 << 8) | b1; } - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // One of the following readByte() will update 'bufferStart' and 'bufferEnd'. - b0 = decoder->readByte(); - b1 = decoder->readByte(); + b0 = decoder_->readByte(); + b1 = decoder_->readByte(); data[curIdx++] = (b0 << 8) | b1; } } @@ -109,11 +109,11 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Exhaust the buffer - int64_t bufferNum = decoder->bufLength() / 3; + int64_t bufferNum = decoder_->bufLength() / 3; bufferNum = std::min(bufferNum, static_cast(offset + len - curIdx)); uint32_t b0, b1, b2; // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); for (int i = 0; i < bufferNum; ++i) { b0 = static_cast(*buffer); b1 = static_cast(*(buffer + 1)); @@ -122,13 +122,13 @@ namespace orc { data[curIdx++] = static_cast((b0 << 16) | (b1 << 8) | b2); } //////decoder->bufferStart += bufferNum * 3; - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // One of the following readByte() will update 'bufferStart' and 'bufferEnd'. - b0 = decoder->readByte(); - b1 = decoder->readByte(); - b2 = decoder->readByte(); + b0 = decoder_->readByte(); + b1 = decoder_->readByte(); + b2 = decoder_->readByte(); data[curIdx++] = static_cast((b0 << 16) | (b1 << 8) | b2); } } @@ -137,11 +137,11 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Exhaust the buffer - int64_t bufferNum = decoder->bufLength() / 4; + int64_t bufferNum = decoder_->bufLength() / 4; bufferNum = std::min(bufferNum, static_cast(offset + len - curIdx)); uint32_t b0, b1, b2, b3; // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); for (int i = 0; i < bufferNum; ++i) { b0 = static_cast(*buffer); b1 = static_cast(*(buffer + 1)); @@ -150,14 +150,14 @@ namespace orc { buffer += 4; data[curIdx++] = static_cast((b0 << 24) | (b1 << 16) | (b2 << 8) | b3); } - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // One of the following readByte() will update 'bufferStart' and 'bufferEnd'. - b0 = decoder->readByte(); - b1 = decoder->readByte(); - b2 = decoder->readByte(); - b3 = decoder->readByte(); + b0 = decoder_->readByte(); + b1 = decoder_->readByte(); + b2 = decoder_->readByte(); + b3 = decoder_->readByte(); data[curIdx++] = static_cast((b0 << 24) | (b1 << 16) | (b2 << 8) | b3); } } @@ -166,11 +166,11 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Exhaust the buffer - int64_t bufferNum = decoder->bufLength() / 5; + int64_t bufferNum = decoder_->bufLength() / 5; bufferNum = std::min(bufferNum, static_cast(offset + len - curIdx)); uint64_t b0, b1, b2, b3, b4; // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); for (int i = 0; i < bufferNum; ++i) { b0 = static_cast(*buffer); b1 = static_cast(*(buffer + 1)); @@ -181,15 +181,15 @@ namespace orc { data[curIdx++] = static_cast((b0 << 32) | (b1 << 24) | (b2 << 16) | (b3 << 8) | b4); } - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // One of the following readByte() will update 'bufferStart' and 'bufferEnd'. - b0 = decoder->readByte(); - b1 = decoder->readByte(); - b2 = decoder->readByte(); - b3 = decoder->readByte(); - b4 = decoder->readByte(); + b0 = decoder_->readByte(); + b1 = decoder_->readByte(); + b2 = decoder_->readByte(); + b3 = decoder_->readByte(); + b4 = decoder_->readByte(); data[curIdx++] = static_cast((b0 << 32) | (b1 << 24) | (b2 << 16) | (b3 << 8) | b4); } } @@ -198,11 +198,11 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Exhaust the buffer - int64_t bufferNum = decoder->bufLength() / 6; + int64_t bufferNum = decoder_->bufLength() / 6; bufferNum = std::min(bufferNum, static_cast(offset + len - curIdx)); uint64_t b0, b1, b2, b3, b4, b5; // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); for (int i = 0; i < bufferNum; ++i) { b0 = static_cast(*buffer); b1 = static_cast(*(buffer + 1)); @@ -214,16 +214,16 @@ namespace orc { data[curIdx++] = static_cast((b0 << 40) | (b1 << 32) | (b2 << 24) | (b3 << 16) | (b4 << 8) | b5); } - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // One of the following readByte() will update 'bufferStart' and 'bufferEnd'. - b0 = decoder->readByte(); - b1 = decoder->readByte(); - b2 = decoder->readByte(); - b3 = decoder->readByte(); - b4 = decoder->readByte(); - b5 = decoder->readByte(); + b0 = decoder_->readByte(); + b1 = decoder_->readByte(); + b2 = decoder_->readByte(); + b3 = decoder_->readByte(); + b4 = decoder_->readByte(); + b5 = decoder_->readByte(); data[curIdx++] = static_cast((b0 << 40) | (b1 << 32) | (b2 << 24) | (b3 << 16) | (b4 << 8) | b5); } @@ -233,11 +233,11 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Exhaust the buffer - int64_t bufferNum = decoder->bufLength() / 7; + int64_t bufferNum = decoder_->bufLength() / 7; bufferNum = std::min(bufferNum, static_cast(offset + len - curIdx)); uint64_t b0, b1, b2, b3, b4, b5, b6; // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); for (int i = 0; i < bufferNum; ++i) { b0 = static_cast(*buffer); b1 = static_cast(*(buffer + 1)); @@ -250,17 +250,17 @@ namespace orc { data[curIdx++] = static_cast((b0 << 48) | (b1 << 40) | (b2 << 32) | (b3 << 24) | (b4 << 16) | (b5 << 8) | b6); } - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // One of the following readByte() will update 'bufferStart' and 'bufferEnd'. - b0 = decoder->readByte(); - b1 = decoder->readByte(); - b2 = decoder->readByte(); - b3 = decoder->readByte(); - b4 = decoder->readByte(); - b5 = decoder->readByte(); - b6 = decoder->readByte(); + b0 = decoder_->readByte(); + b1 = decoder_->readByte(); + b2 = decoder_->readByte(); + b3 = decoder_->readByte(); + b4 = decoder_->readByte(); + b5 = decoder_->readByte(); + b6 = decoder_->readByte(); data[curIdx++] = static_cast((b0 << 48) | (b1 << 40) | (b2 << 32) | (b3 << 24) | (b4 << 16) | (b5 << 8) | b6); } @@ -270,11 +270,11 @@ namespace orc { uint64_t curIdx = offset; while (curIdx < offset + len) { // Exhaust the buffer - int64_t bufferNum = decoder->bufLength() / 8; + int64_t bufferNum = decoder_->bufLength() / 8; bufferNum = std::min(bufferNum, static_cast(offset + len - curIdx)); uint64_t b0, b1, b2, b3, b4, b5, b6, b7; // Avoid updating 'bufferStart' inside the loop. - auto* buffer = reinterpret_cast(decoder->getBufStart()); + auto* buffer = reinterpret_cast(decoder_->getBufStart()); for (int i = 0; i < bufferNum; ++i) { b0 = static_cast(*buffer); b1 = static_cast(*(buffer + 1)); @@ -288,18 +288,18 @@ namespace orc { data[curIdx++] = static_cast((b0 << 56) | (b1 << 48) | (b2 << 40) | (b3 << 32) | (b4 << 24) | (b5 << 16) | (b6 << 8) | b7); } - decoder->setBufStart(reinterpret_cast(buffer)); + decoder_->setBufStart(reinterpret_cast(buffer)); if (curIdx == offset + len) return; // One of the following readByte() will update 'bufferStart' and 'bufferEnd'. - b0 = decoder->readByte(); - b1 = decoder->readByte(); - b2 = decoder->readByte(); - b3 = decoder->readByte(); - b4 = decoder->readByte(); - b5 = decoder->readByte(); - b6 = decoder->readByte(); - b7 = decoder->readByte(); + b0 = decoder_->readByte(); + b1 = decoder_->readByte(); + b2 = decoder_->readByte(); + b3 = decoder_->readByte(); + b4 = decoder_->readByte(); + b5 = decoder_->readByte(); + b6 = decoder_->readByte(); + b7 = decoder_->readByte(); data[curIdx++] = static_cast((b0 << 56) | (b1 << 48) | (b2 << 40) | (b3 << 32) | (b4 << 24) | (b5 << 16) | (b6 << 8) | b7); } @@ -309,19 +309,19 @@ namespace orc { for (uint64_t i = offset; i < (offset + len); i++) { uint64_t result = 0; uint64_t bitsLeftToRead = fbs; - while (bitsLeftToRead > decoder->getBitsLeft()) { - result <<= decoder->getBitsLeft(); - result |= decoder->getCurByte() & ((1 << decoder->getBitsLeft()) - 1); - bitsLeftToRead -= decoder->getBitsLeft(); - decoder->setCurByte(decoder->readByte()); - decoder->setBitsLeft(8); + while (bitsLeftToRead > decoder_->getBitsLeft()) { + result <<= decoder_->getBitsLeft(); + result |= decoder_->getCurByte() & ((1 << decoder_->getBitsLeft()) - 1); + bitsLeftToRead -= decoder_->getBitsLeft(); + decoder_->setCurByte(decoder_->readByte()); + decoder_->setBitsLeft(8); } // handle the left over bits if (bitsLeftToRead > 0) { result <<= bitsLeftToRead; - decoder->setBitsLeft(decoder->getBitsLeft() - static_cast(bitsLeftToRead)); - result |= (decoder->getCurByte() >> decoder->getBitsLeft()) & ((1 << bitsLeftToRead) - 1); + decoder_->setBitsLeft(decoder_->getBitsLeft() - static_cast(bitsLeftToRead)); + result |= (decoder_->getCurByte() >> decoder_->getBitsLeft()) & ((1 << bitsLeftToRead) - 1); } data[i] = static_cast(result); } diff --git a/c++/src/BpackingDefault.hh b/c++/src/BpackingDefault.hh index 0a582344958..bbd7851260f 100644 --- a/c++/src/BpackingDefault.hh +++ b/c++/src/BpackingDefault.hh @@ -45,7 +45,7 @@ namespace orc { void plainUnpackLongs(int64_t* data, uint64_t offset, uint64_t len, uint64_t fbs); private: - RleDecoderV2* decoder; + RleDecoderV2* decoder_; }; class BitUnpackDefault : public BitUnpack { diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc index b81d282e353..bdbaad1da6d 100644 --- a/c++/src/ByteRLE.cc +++ b/c++/src/ByteRLE.cc @@ -244,14 +244,14 @@ namespace orc { virtual void suppress() override; private: - int bitsRemained; - char current; + int bitsRemained_; + char current_; }; BooleanRleEncoderImpl::BooleanRleEncoderImpl(std::unique_ptr output) : ByteRleEncoderImpl(std::move(output)) { - bitsRemained = 8; - current = static_cast(0); + bitsRemained_ = 8; + current_ = static_cast(0); } BooleanRleEncoderImpl::~BooleanRleEncoderImpl() { @@ -260,43 +260,43 @@ namespace orc { void BooleanRleEncoderImpl::add(const char* data, uint64_t numValues, const char* notNull) { for (uint64_t i = 0; i < numValues; ++i) { - if (bitsRemained == 0) { - write(current); - current = static_cast(0); - bitsRemained = 8; + if (bitsRemained_ == 0) { + write(current_); + current_ = static_cast(0); + bitsRemained_ = 8; } if (!notNull || notNull[i]) { if (!data || data[i]) { - current = static_cast(current | (0x80 >> (8 - bitsRemained))); + current_ = static_cast(current_ | (0x80 >> (8 - bitsRemained_))); } - --bitsRemained; + --bitsRemained_; } } - if (bitsRemained == 0) { - write(current); - current = static_cast(0); - bitsRemained = 8; + if (bitsRemained_ == 0) { + write(current_); + current_ = static_cast(0); + bitsRemained_ = 8; } } uint64_t BooleanRleEncoderImpl::flush() { - if (bitsRemained != 8) { - write(current); + if (bitsRemained_ != 8) { + write(current_); } - bitsRemained = 8; - current = static_cast(0); + bitsRemained_ = 8; + current_ = static_cast(0); return ByteRleEncoderImpl::flush(); } void BooleanRleEncoderImpl::recordPosition(PositionRecorder* recorder) const { ByteRleEncoderImpl::recordPosition(recorder); - recorder->add(static_cast(8 - bitsRemained)); + recorder->add(static_cast(8 - bitsRemained_)); } void BooleanRleEncoderImpl::suppress() { ByteRleEncoderImpl::suppress(); - bitsRemained = 8; - current = static_cast(0); + bitsRemained_ = 8; + current_ = static_cast(0); } std::unique_ptr createBooleanRleEncoder( @@ -386,8 +386,8 @@ namespace orc { } ByteRleDecoderImpl::ByteRleDecoderImpl(std::unique_ptr input, - ReaderMetrics* _metrics) - : metrics(_metrics) { + ReaderMetrics* metrics) + : metrics(metrics) { inputStream = std::move(input); reset(); } @@ -526,8 +526,8 @@ namespace orc { }; BooleanRleDecoderImpl::BooleanRleDecoderImpl(std::unique_ptr input, - ReaderMetrics* _metrics) - : ByteRleDecoderImpl(std::move(input), _metrics) { + ReaderMetrics* metrics) + : ByteRleDecoderImpl(std::move(input), metrics) { remainingBits = 0; lastByte = 0; } diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc index 5297f80371b..bd6c4535d8d 100644 --- a/c++/src/ColumnPrinter.cc +++ b/c++/src/ColumnPrinter.cc @@ -43,7 +43,7 @@ namespace orc { class BooleanColumnPrinter : public ColumnPrinter { private: - const int64_t* data; + const int64_t* data_; public: BooleanColumnPrinter(std::string&); @@ -54,7 +54,7 @@ namespace orc { class LongColumnPrinter : public ColumnPrinter { private: - const int64_t* data; + const int64_t* data_; public: LongColumnPrinter(std::string&); @@ -65,8 +65,8 @@ namespace orc { class DoubleColumnPrinter : public ColumnPrinter { private: - const double* data; - const bool isFloat; + const double* data_; + const bool isFloat_; public: DoubleColumnPrinter(std::string&, const Type& type); @@ -77,8 +77,8 @@ namespace orc { class TimestampColumnPrinter : public ColumnPrinter { private: - const int64_t* seconds; - const int64_t* nanoseconds; + const int64_t* seconds_; + const int64_t* nanoseconds_; public: TimestampColumnPrinter(std::string&); @@ -89,7 +89,7 @@ namespace orc { class DateColumnPrinter : public ColumnPrinter { private: - const int64_t* data; + const int64_t* data_; public: DateColumnPrinter(std::string&); @@ -100,8 +100,8 @@ namespace orc { class Decimal64ColumnPrinter : public ColumnPrinter { private: - const int64_t* data; - int32_t scale; + const int64_t* data_; + int32_t scale_; public: Decimal64ColumnPrinter(std::string&); @@ -112,8 +112,8 @@ namespace orc { class Decimal128ColumnPrinter : public ColumnPrinter { private: - const Int128* data; - int32_t scale; + const Int128* data_; + int32_t scale_; public: Decimal128ColumnPrinter(std::string&); @@ -124,8 +124,8 @@ namespace orc { class StringColumnPrinter : public ColumnPrinter { private: - const char* const* start; - const int64_t* length; + const char* const* start_; + const int64_t* length_; public: StringColumnPrinter(std::string&); @@ -136,8 +136,8 @@ namespace orc { class BinaryColumnPrinter : public ColumnPrinter { private: - const char* const* start; - const int64_t* length; + const char* const* start_; + const int64_t* length_; public: BinaryColumnPrinter(std::string&); @@ -148,8 +148,8 @@ namespace orc { class ListColumnPrinter : public ColumnPrinter { private: - const int64_t* offsets; - std::unique_ptr elementPrinter; + const int64_t* offsets_; + std::unique_ptr elementPrinter_; public: ListColumnPrinter(std::string&, const Type& type); @@ -160,9 +160,9 @@ namespace orc { class MapColumnPrinter : public ColumnPrinter { private: - const int64_t* offsets; - std::unique_ptr keyPrinter; - std::unique_ptr elementPrinter; + const int64_t* offsets_; + std::unique_ptr keyPrinter_; + std::unique_ptr elementPrinter_; public: MapColumnPrinter(std::string&, const Type& type); @@ -173,9 +173,9 @@ namespace orc { class UnionColumnPrinter : public ColumnPrinter { private: - const unsigned char* tags; - const uint64_t* offsets; - std::vector> fieldPrinter; + const unsigned char* tags_; + const uint64_t* offsets_; + std::vector> fieldPrinter_; public: UnionColumnPrinter(std::string&, const Type& type); @@ -185,8 +185,8 @@ namespace orc { class StructColumnPrinter : public ColumnPrinter { private: - std::vector> fieldPrinter; - std::vector fieldNames; + std::vector> fieldPrinter_; + std::vector fieldNames_; public: StructColumnPrinter(std::string&, const Type& type); @@ -203,7 +203,7 @@ namespace orc { file.append(ptr, len); } - ColumnPrinter::ColumnPrinter(std::string& _buffer) : buffer(_buffer) { + ColumnPrinter::ColumnPrinter(std::string& bf) : buffer(bf) { notNull = nullptr; hasNulls = false; } @@ -293,7 +293,7 @@ namespace orc { return result; } - VoidColumnPrinter::VoidColumnPrinter(std::string& _buffer) : ColumnPrinter(_buffer) { + VoidColumnPrinter::VoidColumnPrinter(std::string& buffer) : ColumnPrinter(buffer) { // PASS } @@ -305,33 +305,33 @@ namespace orc { writeString(buffer, "null"); } - LongColumnPrinter::LongColumnPrinter(std::string& _buffer) - : ColumnPrinter(_buffer), data(nullptr) { + LongColumnPrinter::LongColumnPrinter(std::string& buffer) + : ColumnPrinter(buffer), data_(nullptr) { // PASS } void LongColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - data = dynamic_cast(batch).data.data(); + data_ = dynamic_cast(batch).data.data(); } void LongColumnPrinter::printRow(uint64_t rowId) { if (hasNulls && !notNull[rowId]) { writeString(buffer, "null"); } else { - const auto numBuffer = std::to_string(static_cast(data[rowId])); + const auto numBuffer = std::to_string(static_cast(data_[rowId])); writeString(buffer, numBuffer.c_str()); } } - DoubleColumnPrinter::DoubleColumnPrinter(std::string& _buffer, const Type& type) - : ColumnPrinter(_buffer), data(nullptr), isFloat(type.getKind() == FLOAT) { + DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer, const Type& type) + : ColumnPrinter(buffer), data_(nullptr), isFloat_(type.getKind() == FLOAT) { // PASS } void DoubleColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - data = dynamic_cast(batch).data.data(); + data_ = dynamic_cast(batch).data.data(); } void DoubleColumnPrinter::printRow(uint64_t rowId) { @@ -339,20 +339,20 @@ namespace orc { writeString(buffer, "null"); } else { char numBuffer[64]; - snprintf(numBuffer, sizeof(numBuffer), isFloat ? "%.7g" : "%.14g", data[rowId]); + snprintf(numBuffer, sizeof(numBuffer), isFloat_ ? "%.7g" : "%.14g", data_[rowId]); writeString(buffer, numBuffer); } } - Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& _buffer) - : ColumnPrinter(_buffer), data(nullptr), scale(0) { + Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer) + : ColumnPrinter(buffer), data_(nullptr), scale_(0) { // PASS } void Decimal64ColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - data = dynamic_cast(batch).values.data(); - scale = dynamic_cast(batch).scale; + data_ = dynamic_cast(batch).values.data(); + scale_ = dynamic_cast(batch).scale; } std::string toDecimalString(int64_t value, int32_t scale) { @@ -387,38 +387,38 @@ namespace orc { if (hasNulls && !notNull[rowId]) { writeString(buffer, "null"); } else { - writeString(buffer, toDecimalString(data[rowId], scale).c_str()); + writeString(buffer, toDecimalString(data_[rowId], scale_).c_str()); } } - Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& _buffer) - : ColumnPrinter(_buffer), data(nullptr), scale(0) { + Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer) + : ColumnPrinter(buffer), data_(nullptr), scale_(0) { // PASS } void Decimal128ColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - data = dynamic_cast(batch).values.data(); - scale = dynamic_cast(batch).scale; + data_ = dynamic_cast(batch).values.data(); + scale_ = dynamic_cast(batch).scale; } void Decimal128ColumnPrinter::printRow(uint64_t rowId) { if (hasNulls && !notNull[rowId]) { writeString(buffer, "null"); } else { - writeString(buffer, data[rowId].toDecimalString(scale).c_str()); + writeString(buffer, data_[rowId].toDecimalString(scale_).c_str()); } } - StringColumnPrinter::StringColumnPrinter(std::string& _buffer) - : ColumnPrinter(_buffer), start(nullptr), length(nullptr) { + StringColumnPrinter::StringColumnPrinter(std::string& buffer) + : ColumnPrinter(buffer), start_(nullptr), length_(nullptr) { // PASS } void StringColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - start = dynamic_cast(batch).data.data(); - length = dynamic_cast(batch).length.data(); + start_ = dynamic_cast(batch).data.data(); + length_ = dynamic_cast(batch).length.data(); } void StringColumnPrinter::printRow(uint64_t rowId) { @@ -426,8 +426,8 @@ namespace orc { writeString(buffer, "null"); } else { writeChar(buffer, '"'); - for (int64_t i = 0; i < length[rowId]; ++i) { - char ch = static_cast(start[rowId][i]); + for (int64_t i = 0; i < length_[rowId]; ++i) { + char ch = static_cast(start_[rowId][i]); switch (ch) { case '\\': writeString(buffer, "\\\\"); @@ -459,15 +459,15 @@ namespace orc { } } - ListColumnPrinter::ListColumnPrinter(std::string& _buffer, const Type& type) - : ColumnPrinter(_buffer), offsets(nullptr) { - elementPrinter = createColumnPrinter(buffer, type.getSubtype(0)); + ListColumnPrinter::ListColumnPrinter(std::string& buffer, const Type& type) + : ColumnPrinter(buffer), offsets_(nullptr) { + elementPrinter_ = createColumnPrinter(buffer, type.getSubtype(0)); } void ListColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - offsets = dynamic_cast(batch).offsets.data(); - elementPrinter->reset(*dynamic_cast(batch).elements); + offsets_ = dynamic_cast(batch).offsets.data(); + elementPrinter_->reset(*dynamic_cast(batch).elements); } void ListColumnPrinter::printRow(uint64_t rowId) { @@ -475,28 +475,28 @@ namespace orc { writeString(buffer, "null"); } else { writeChar(buffer, '['); - for (int64_t i = offsets[rowId]; i < offsets[rowId + 1]; ++i) { - if (i != offsets[rowId]) { + for (int64_t i = offsets_[rowId]; i < offsets_[rowId + 1]; ++i) { + if (i != offsets_[rowId]) { writeString(buffer, ", "); } - elementPrinter->printRow(static_cast(i)); + elementPrinter_->printRow(static_cast(i)); } writeChar(buffer, ']'); } } - MapColumnPrinter::MapColumnPrinter(std::string& _buffer, const Type& type) - : ColumnPrinter(_buffer), offsets(nullptr) { - keyPrinter = createColumnPrinter(buffer, type.getSubtype(0)); - elementPrinter = createColumnPrinter(buffer, type.getSubtype(1)); + MapColumnPrinter::MapColumnPrinter(std::string& buffer, const Type& type) + : ColumnPrinter(buffer), offsets_(nullptr) { + keyPrinter_ = createColumnPrinter(buffer, type.getSubtype(0)); + elementPrinter_ = createColumnPrinter(buffer, type.getSubtype(1)); } void MapColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); const MapVectorBatch& myBatch = dynamic_cast(batch); - offsets = myBatch.offsets.data(); - keyPrinter->reset(*myBatch.keys); - elementPrinter->reset(*myBatch.elements); + offsets_ = myBatch.offsets.data(); + keyPrinter_->reset(*myBatch.keys); + elementPrinter_->reset(*myBatch.elements); } void MapColumnPrinter::printRow(uint64_t rowId) { @@ -504,34 +504,34 @@ namespace orc { writeString(buffer, "null"); } else { writeChar(buffer, '['); - for (int64_t i = offsets[rowId]; i < offsets[rowId + 1]; ++i) { - if (i != offsets[rowId]) { + for (int64_t i = offsets_[rowId]; i < offsets_[rowId + 1]; ++i) { + if (i != offsets_[rowId]) { writeString(buffer, ", "); } writeString(buffer, "{\"key\": "); - keyPrinter->printRow(static_cast(i)); + keyPrinter_->printRow(static_cast(i)); writeString(buffer, ", \"value\": "); - elementPrinter->printRow(static_cast(i)); + elementPrinter_->printRow(static_cast(i)); writeChar(buffer, '}'); } writeChar(buffer, ']'); } } - UnionColumnPrinter::UnionColumnPrinter(std::string& _buffer, const Type& type) - : ColumnPrinter(_buffer), tags(nullptr), offsets(nullptr) { + UnionColumnPrinter::UnionColumnPrinter(std::string& buffer, const Type& type) + : ColumnPrinter(buffer), tags_(nullptr), offsets_(nullptr) { for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) { - fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))); + fieldPrinter_.push_back(createColumnPrinter(buffer, type.getSubtype(i))); } } void UnionColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); const UnionVectorBatch& unionBatch = dynamic_cast(batch); - tags = unionBatch.tags.data(); - offsets = unionBatch.offsets.data(); - for (size_t i = 0; i < fieldPrinter.size(); ++i) { - fieldPrinter[i]->reset(*(unionBatch.children[i])); + tags_ = unionBatch.tags.data(); + offsets_ = unionBatch.offsets.data(); + for (size_t i = 0; i < fieldPrinter_.size(); ++i) { + fieldPrinter_[i]->reset(*(unionBatch.children[i])); } } @@ -540,27 +540,27 @@ namespace orc { writeString(buffer, "null"); } else { writeString(buffer, "{\"tag\": "); - const auto numBuffer = std::to_string(static_cast(tags[rowId])); + const auto numBuffer = std::to_string(static_cast(tags_[rowId])); writeString(buffer, numBuffer.c_str()); writeString(buffer, ", \"value\": "); - fieldPrinter[tags[rowId]]->printRow(offsets[rowId]); + fieldPrinter_[tags_[rowId]]->printRow(offsets_[rowId]); writeChar(buffer, '}'); } } - StructColumnPrinter::StructColumnPrinter(std::string& _buffer, const Type& type) - : ColumnPrinter(_buffer) { + StructColumnPrinter::StructColumnPrinter(std::string& buffer, const Type& type) + : ColumnPrinter(buffer) { for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) { - fieldNames.push_back(type.getFieldName(i)); - fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))); + fieldNames_.push_back(type.getFieldName(i)); + fieldPrinter_.push_back(createColumnPrinter(buffer, type.getSubtype(i))); } } void StructColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); const StructVectorBatch& structBatch = dynamic_cast(batch); - for (size_t i = 0; i < fieldPrinter.size(); ++i) { - fieldPrinter[i]->reset(*(structBatch.fields[i])); + for (size_t i = 0; i < fieldPrinter_.size(); ++i) { + fieldPrinter_[i]->reset(*(structBatch.fields[i])); } } @@ -569,21 +569,21 @@ namespace orc { writeString(buffer, "null"); } else { writeChar(buffer, '{'); - for (unsigned int i = 0; i < fieldPrinter.size(); ++i) { + for (unsigned int i = 0; i < fieldPrinter_.size(); ++i) { if (i != 0) { writeString(buffer, ", "); } writeChar(buffer, '"'); - writeString(buffer, fieldNames[i].c_str()); + writeString(buffer, fieldNames_[i].c_str()); writeString(buffer, "\": "); - fieldPrinter[i]->printRow(rowId); + fieldPrinter_[i]->printRow(rowId); } writeChar(buffer, '}'); } } - DateColumnPrinter::DateColumnPrinter(std::string& _buffer) - : ColumnPrinter(_buffer), data(nullptr) { + DateColumnPrinter::DateColumnPrinter(std::string& buffer) + : ColumnPrinter(buffer), data_(nullptr) { // PASS } @@ -591,7 +591,7 @@ namespace orc { if (hasNulls && !notNull[rowId]) { writeString(buffer, "null"); } else { - const time_t timeValue = data[rowId] * 24 * 60 * 60; + const time_t timeValue = data_[rowId] * 24 * 60 * 60; struct tm tmValue; gmtime_r(&timeValue, &tmValue); char timeBuffer[11]; @@ -604,11 +604,11 @@ namespace orc { void DateColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - data = dynamic_cast(batch).data.data(); + data_ = dynamic_cast(batch).data.data(); } - BooleanColumnPrinter::BooleanColumnPrinter(std::string& _buffer) - : ColumnPrinter(_buffer), data(nullptr) { + BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer) + : ColumnPrinter(buffer), data_(nullptr) { // PASS } @@ -616,17 +616,17 @@ namespace orc { if (hasNulls && !notNull[rowId]) { writeString(buffer, "null"); } else { - writeString(buffer, (data[rowId] ? "true" : "false")); + writeString(buffer, (data_[rowId] ? "true" : "false")); } } void BooleanColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - data = dynamic_cast(batch).data.data(); + data_ = dynamic_cast(batch).data.data(); } - BinaryColumnPrinter::BinaryColumnPrinter(std::string& _buffer) - : ColumnPrinter(_buffer), start(nullptr), length(nullptr) { + BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer) + : ColumnPrinter(buffer), start_(nullptr), length_(nullptr) { // PASS } @@ -635,11 +635,11 @@ namespace orc { writeString(buffer, "null"); } else { writeChar(buffer, '['); - for (int64_t i = 0; i < length[rowId]; ++i) { + for (int64_t i = 0; i < length_[rowId]; ++i) { if (i != 0) { writeString(buffer, ", "); } - const auto numBuffer = std::to_string(static_cast(start[rowId][i]) & 0xff); + const auto numBuffer = std::to_string(static_cast(start_[rowId][i]) & 0xff); writeString(buffer, numBuffer.c_str()); } writeChar(buffer, ']'); @@ -648,12 +648,12 @@ namespace orc { void BinaryColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); - start = dynamic_cast(batch).data.data(); - length = dynamic_cast(batch).length.data(); + start_ = dynamic_cast(batch).data.data(); + length_ = dynamic_cast(batch).length.data(); } - TimestampColumnPrinter::TimestampColumnPrinter(std::string& _buffer) - : ColumnPrinter(_buffer), seconds(nullptr), nanoseconds(nullptr) { + TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer) + : ColumnPrinter(buffer), seconds_(nullptr), nanoseconds_(nullptr) { // PASS } @@ -662,8 +662,8 @@ namespace orc { if (hasNulls && !notNull[rowId]) { writeString(buffer, "null"); } else { - int64_t nanos = nanoseconds[rowId]; - time_t secs = static_cast(seconds[rowId]); + int64_t nanos = nanoseconds_[rowId]; + time_t secs = static_cast(seconds_[rowId]); struct tm tmValue; gmtime_r(&secs, &tmValue); char timeBuffer[20]; @@ -694,7 +694,7 @@ namespace orc { void TimestampColumnPrinter::reset(const ColumnVectorBatch& batch) { ColumnPrinter::reset(batch); const TimestampVectorBatch& ts = dynamic_cast(batch); - seconds = ts.data.data(); - nanoseconds = ts.nanoseconds.data(); + seconds_ = ts.data.data(); + nanoseconds_ = ts.nanoseconds.data(); } } // namespace orc diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index e9cd882606d..83129062f56 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -138,7 +138,7 @@ namespace orc { template class BooleanColumnReader : public ColumnReader { private: - std::unique_ptr rle; + std::unique_ptr rle_; public: BooleanColumnReader(const Type& type, StripeStreams& stipe); @@ -157,7 +157,7 @@ namespace orc { std::unique_ptr stream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); if (stream == nullptr) throw ParseError("DATA stream not found in Boolean column"); - rle = createBooleanRleDecoder(std::move(stream), metrics); + rle_ = createBooleanRleDecoder(std::move(stream), metrics); } template @@ -168,7 +168,7 @@ namespace orc { template uint64_t BooleanColumnReader::skip(uint64_t numValues) { numValues = ColumnReader::skip(numValues); - rle->skip(numValues); + rle_->skip(numValues); return numValues; } @@ -180,7 +180,7 @@ namespace orc { // LongVectorBatch with long*. We cheat here in that case and use the long* // and then expand it in a second pass.. auto* ptr = dynamic_cast(rowBatch).data.data(); - rle->next(reinterpret_cast(ptr), numValues, + rle_->next(reinterpret_cast(ptr), numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr); expandBytesToIntegers(ptr, numValues); } @@ -189,27 +189,27 @@ namespace orc { void BooleanColumnReader::seekToRowGroup( std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - rle->seek(positions.at(columnId)); + rle_->seek(positions.at(columnId)); } template class ByteColumnReader : public ColumnReader { private: - std::unique_ptr rle; + std::unique_ptr rle_; public: ByteColumnReader(const Type& type, StripeStreams& stripe) : ColumnReader(type, stripe) { std::unique_ptr stream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); if (stream == nullptr) throw ParseError("DATA stream not found in Byte column"); - rle = createByteRleDecoder(std::move(stream), metrics); + rle_ = createByteRleDecoder(std::move(stream), metrics); } ~ByteColumnReader() override = default; uint64_t skip(uint64_t numValues) override { numValues = ColumnReader::skip(numValues); - rle->skip(numValues); + rle_->skip(numValues); return numValues; } @@ -218,14 +218,14 @@ namespace orc { // Since the byte rle places the output in a char* instead of long*, // we cheat here and use the long* and then expand it in a second pass. auto* ptr = dynamic_cast(rowBatch).data.data(); - rle->next(reinterpret_cast(ptr), numValues, + rle_->next(reinterpret_cast(ptr), numValues, rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr); expandBytesToIntegers(ptr, numValues); } void seekToRowGroup(std::unordered_map& positions) override { ColumnReader::seekToRowGroup(positions); - rle->seek(positions.at(columnId)); + rle_->seek(positions.at(columnId)); } }; @@ -267,12 +267,12 @@ namespace orc { class TimestampColumnReader : public ColumnReader { private: - std::unique_ptr secondsRle; - std::unique_ptr nanoRle; - const Timezone* writerTimezone; - const Timezone* readerTimezone; - const int64_t epochOffset; - const bool sameTimezone; + std::unique_ptr secondsRle_; + std::unique_ptr nanoRle_; + const Timezone* writerTimezone_; + const Timezone* readerTimezone_; + const int64_t epochOffset_; + const bool sameTimezone_; public: TimestampColumnReader(const Type& type, StripeStreams& stripe, bool isInstantType); @@ -288,18 +288,18 @@ namespace orc { TimestampColumnReader::TimestampColumnReader(const Type& type, StripeStreams& stripe, bool isInstantType) : ColumnReader(type, stripe), - writerTimezone(isInstantType ? &getTimezoneByName("GMT") : &stripe.getWriterTimezone()), - readerTimezone(isInstantType ? &getTimezoneByName("GMT") : &stripe.getReaderTimezone()), - epochOffset(writerTimezone->getEpoch()), - sameTimezone(writerTimezone == readerTimezone) { + writerTimezone_(isInstantType ? &getTimezoneByName("GMT") : &stripe.getWriterTimezone()), + readerTimezone_(isInstantType ? &getTimezoneByName("GMT") : &stripe.getReaderTimezone()), + epochOffset_(writerTimezone_->getEpoch()), + sameTimezone_(writerTimezone_ == readerTimezone_) { RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind()); std::unique_ptr stream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); if (stream == nullptr) throw ParseError("DATA stream not found in Timestamp column"); - secondsRle = createRleDecoder(std::move(stream), true, vers, memoryPool, metrics); + secondsRle_ = createRleDecoder(std::move(stream), true, vers, memoryPool, metrics); stream = stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true); if (stream == nullptr) throw ParseError("SECONDARY stream not found in Timestamp column"); - nanoRle = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics); + nanoRle_ = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics); } TimestampColumnReader::~TimestampColumnReader() { @@ -308,8 +308,8 @@ namespace orc { uint64_t TimestampColumnReader::skip(uint64_t numValues) { numValues = ColumnReader::skip(numValues); - secondsRle->skip(numValues); - nanoRle->skip(numValues); + secondsRle_->skip(numValues); + nanoRle_->skip(numValues); return numValues; } @@ -318,9 +318,9 @@ namespace orc { notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; TimestampVectorBatch& timestampBatch = dynamic_cast(rowBatch); int64_t* secsBuffer = timestampBatch.data.data(); - secondsRle->next(secsBuffer, numValues, notNull); + secondsRle_->next(secsBuffer, numValues, notNull); int64_t* nanoBuffer = timestampBatch.nanoseconds.data(); - nanoRle->next(nanoBuffer, numValues, notNull); + nanoRle_->next(nanoBuffer, numValues, notNull); // Construct the values for (uint64_t i = 0; i < numValues; i++) { @@ -332,17 +332,17 @@ namespace orc { nanoBuffer[i] *= 10; } } - int64_t writerTime = secsBuffer[i] + epochOffset; - if (!sameTimezone) { + int64_t writerTime = secsBuffer[i] + epochOffset_; + if (!sameTimezone_) { // adjust timestamp value to same wall clock time if writer and reader // time zones have different rules, which is required for Apache Orc. - const auto& wv = writerTimezone->getVariant(writerTime); - const auto& rv = readerTimezone->getVariant(writerTime); + const auto& wv = writerTimezone_->getVariant(writerTime); + const auto& rv = readerTimezone_->getVariant(writerTime); if (!wv.hasSameTzRule(rv)) { // If the timezone adjustment moves the millis across a DST boundary, // we need to reevaluate the offsets. int64_t adjustedTime = writerTime + wv.gmtOffset - rv.gmtOffset; - const auto& adjustedReader = readerTimezone->getVariant(adjustedTime); + const auto& adjustedReader = readerTimezone_->getVariant(adjustedTime); writerTime = writerTime + wv.gmtOffset - adjustedReader.gmtOffset; } } @@ -357,8 +357,8 @@ namespace orc { void TimestampColumnReader::seekToRowGroup( std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - secondsRle->seek(positions.at(columnId)); - nanoRle->seek(positions.at(columnId)); + secondsRle_->seek(positions.at(columnId)); + nanoRle_->seek(positions.at(columnId)); } template @@ -374,39 +374,39 @@ namespace orc { void seekToRowGroup(std::unordered_map& positions) override; private: - std::unique_ptr inputStream; - const uint64_t bytesPerValue = (columnKind == FLOAT) ? 4 : 8; - const char* bufferPointer; - const char* bufferEnd; + std::unique_ptr inputStream_; + const uint64_t bytesPerValue_ = (columnKind == FLOAT) ? 4 : 8; + const char* bufferPointer_; + const char* bufferEnd_; unsigned char readByte() { - if (bufferPointer == bufferEnd) { + if (bufferPointer_ == bufferEnd_) { int length; - if (!inputStream->Next(reinterpret_cast(&bufferPointer), &length)) { + if (!inputStream_->Next(reinterpret_cast(&bufferPointer_), &length)) { throw ParseError("bad read in DoubleColumnReader::next()"); } - bufferEnd = bufferPointer + length; + bufferEnd_ = bufferPointer_ + length; } - return static_cast(*(bufferPointer++)); + return static_cast(*(bufferPointer_++)); } template FloatType readDouble() { int64_t bits = 0; - if (bufferEnd - bufferPointer >= 8) { + if (bufferEnd_ - bufferPointer_ >= 8) { if (isLittleEndian) { - bits = *(reinterpret_cast(bufferPointer)); + bits = *(reinterpret_cast(bufferPointer_)); } else { - bits = static_cast(static_cast(bufferPointer[0])); - bits |= static_cast(static_cast(bufferPointer[1])) << 8; - bits |= static_cast(static_cast(bufferPointer[2])) << 16; - bits |= static_cast(static_cast(bufferPointer[3])) << 24; - bits |= static_cast(static_cast(bufferPointer[4])) << 32; - bits |= static_cast(static_cast(bufferPointer[5])) << 40; - bits |= static_cast(static_cast(bufferPointer[6])) << 48; - bits |= static_cast(static_cast(bufferPointer[7])) << 56; + bits = static_cast(static_cast(bufferPointer_[0])); + bits |= static_cast(static_cast(bufferPointer_[1])) << 8; + bits |= static_cast(static_cast(bufferPointer_[2])) << 16; + bits |= static_cast(static_cast(bufferPointer_[3])) << 24; + bits |= static_cast(static_cast(bufferPointer_[4])) << 32; + bits |= static_cast(static_cast(bufferPointer_[5])) << 40; + bits |= static_cast(static_cast(bufferPointer_[6])) << 48; + bits |= static_cast(static_cast(bufferPointer_[7])) << 56; } - bufferPointer += 8; + bufferPointer_ += 8; } else { for (uint64_t i = 0; i < 8; i++) { bits |= static_cast(readByte()) << (i * 8); @@ -419,16 +419,16 @@ namespace orc { template FloatType readFloat() { int32_t bits = 0; - if (bufferEnd - bufferPointer >= 4) { + if (bufferEnd_ - bufferPointer_ >= 4) { if (isLittleEndian) { - bits = *(reinterpret_cast(bufferPointer)); + bits = *(reinterpret_cast(bufferPointer_)); } else { - bits = static_cast(bufferPointer[0]); - bits |= static_cast(bufferPointer[1]) << 8; - bits |= static_cast(bufferPointer[2]) << 16; - bits |= static_cast(bufferPointer[3]) << 24; + bits = static_cast(bufferPointer_[0]); + bits |= static_cast(bufferPointer_[1]) << 8; + bits |= static_cast(bufferPointer_[2]) << 16; + bits |= static_cast(bufferPointer_[3]) << 24; } - bufferPointer += 4; + bufferPointer_ += 4; } else { for (uint64_t i = 0; i < 4; i++) { bits |= readByte() << (i * 8); @@ -445,9 +445,9 @@ namespace orc { template DoubleColumnReader::DoubleColumnReader( const Type& type, StripeStreams& stripe) - : ColumnReader(type, stripe), bufferPointer(nullptr), bufferEnd(nullptr) { - inputStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); - if (inputStream == nullptr) throw ParseError("DATA stream not found in Double column"); + : ColumnReader(type, stripe), bufferPointer_(nullptr), bufferEnd_(nullptr) { + inputStream_ = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); + if (inputStream_ == nullptr) throw ParseError("DATA stream not found in Double column"); } template @@ -455,19 +455,19 @@ namespace orc { uint64_t numValues) { numValues = ColumnReader::skip(numValues); - if (static_cast(bufferEnd - bufferPointer) >= bytesPerValue * numValues) { - bufferPointer += bytesPerValue * numValues; + if (static_cast(bufferEnd_ - bufferPointer_) >= bytesPerValue_ * numValues) { + bufferPointer_ += bytesPerValue_ * numValues; } else { size_t sizeToSkip = - bytesPerValue * numValues - static_cast(bufferEnd - bufferPointer); + bytesPerValue_ * numValues - static_cast(bufferEnd_ - bufferPointer_); const size_t cap = static_cast(std::numeric_limits::max()); while (sizeToSkip != 0) { size_t step = sizeToSkip > cap ? cap : sizeToSkip; - inputStream->Skip(static_cast(step)); + inputStream_->Skip(static_cast(step)); sizeToSkip -= step; } - bufferEnd = nullptr; - bufferPointer = nullptr; + bufferEnd_ = nullptr; + bufferPointer_ = nullptr; } return numValues; @@ -507,10 +507,10 @@ namespace orc { uint64_t bufferNum = 0; if (isLittleEndian) { bufferNum = - std::min(numValues, static_cast(bufferEnd - bufferPointer) / bytesPerValue); - uint64_t bufferBytes = bufferNum * bytesPerValue; - memcpy(outArray, bufferPointer, bufferBytes); - bufferPointer += bufferBytes; + std::min(numValues, static_cast(bufferEnd_ - bufferPointer_) / bytesPerValue_); + uint64_t bufferBytes = bufferNum * bytesPerValue_; + memcpy(outArray, bufferPointer_, bufferBytes); + bufferPointer_ += bufferBytes; } for (size_t i = bufferNum; i < numValues; ++i) { outArray[i] = readDouble(); @@ -523,10 +523,10 @@ namespace orc { void DoubleColumnReader::seekToRowGroup( std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - inputStream->seek(positions.at(columnId)); + inputStream_->seek(positions.at(columnId)); // clear buffer state after seek - bufferEnd = nullptr; - bufferPointer = nullptr; + bufferEnd_ = nullptr; + bufferPointer_ = nullptr; } void readFully(char* buffer, int64_t bufferSize, SeekableInputStream* stream) { @@ -547,8 +547,8 @@ namespace orc { class StringDictionaryColumnReader : public ColumnReader { private: - std::shared_ptr dictionary; - std::unique_ptr rle; + std::shared_ptr dictionary_; + std::unique_ptr rle_; public: StringDictionaryColumnReader(const Type& type, StripeStreams& stipe); @@ -565,7 +565,7 @@ namespace orc { StringDictionaryColumnReader::StringDictionaryColumnReader(const Type& type, StripeStreams& stripe) - : ColumnReader(type, stripe), dictionary(new StringDictionary(stripe.getMemoryPool())) { + : ColumnReader(type, stripe), dictionary_(new StringDictionary(stripe.getMemoryPool())) { RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId).kind()); uint32_t dictSize = stripe.getEncoding(columnId).dictionary_size(); std::unique_ptr stream = @@ -573,15 +573,15 @@ namespace orc { if (stream == nullptr) { throw ParseError("DATA stream not found in StringDictionaryColumn"); } - rle = createRleDecoder(std::move(stream), false, rleVersion, memoryPool, metrics); + rle_ = createRleDecoder(std::move(stream), false, rleVersion, memoryPool, metrics); stream = stripe.getStream(columnId, proto::Stream_Kind_LENGTH, false); if (dictSize > 0 && stream == nullptr) { throw ParseError("LENGTH stream not found in StringDictionaryColumn"); } std::unique_ptr lengthDecoder = createRleDecoder(std::move(stream), false, rleVersion, memoryPool, metrics); - dictionary->dictionaryOffset.resize(dictSize + 1); - int64_t* lengthArray = dictionary->dictionaryOffset.data(); + dictionary_->dictionaryOffset.resize(dictSize + 1); + int64_t* lengthArray = dictionary_->dictionaryOffset.data(); lengthDecoder->next(lengthArray + 1, dictSize, nullptr); lengthArray[0] = 0; for (uint32_t i = 1; i < dictSize + 1; ++i) { @@ -591,13 +591,13 @@ namespace orc { lengthArray[i] += lengthArray[i - 1]; } int64_t blobSize = lengthArray[dictSize]; - dictionary->dictionaryBlob.resize(static_cast(blobSize)); + dictionary_->dictionaryBlob.resize(static_cast(blobSize)); std::unique_ptr blobStream = stripe.getStream(columnId, proto::Stream_Kind_DICTIONARY_DATA, false); if (blobSize > 0 && blobStream == nullptr) { throw ParseError("DICTIONARY_DATA stream not found in StringDictionaryColumn"); } - readFully(dictionary->dictionaryBlob.data(), blobSize, blobStream.get()); + readFully(dictionary_->dictionaryBlob.data(), blobSize, blobStream.get()); } StringDictionaryColumnReader::~StringDictionaryColumnReader() { @@ -606,7 +606,7 @@ namespace orc { uint64_t StringDictionaryColumnReader::skip(uint64_t numValues) { numValues = ColumnReader::skip(numValues); - rle->skip(numValues); + rle_->skip(numValues); return numValues; } @@ -616,12 +616,12 @@ namespace orc { // update the notNull from the parent class notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; StringVectorBatch& byteBatch = dynamic_cast(rowBatch); - char* blob = dictionary->dictionaryBlob.data(); - int64_t* dictionaryOffsets = dictionary->dictionaryOffset.data(); + char* blob = dictionary_->dictionaryBlob.data(); + int64_t* dictionaryOffsets = dictionary_->dictionaryOffset.data(); char** outputStarts = byteBatch.data.data(); int64_t* outputLengths = byteBatch.length.data(); - rle->next(outputLengths, numValues, notNull); - uint64_t dictionaryCount = dictionary->dictionaryOffset.size() - 1; + rle_->next(outputLengths, numValues, notNull); + uint64_t dictionaryCount = dictionary_->dictionaryOffset.size() - 1; if (notNull) { for (uint64_t i = 0; i < numValues; ++i) { if (notNull[i]) { @@ -652,24 +652,24 @@ namespace orc { rowBatch.isEncoded = true; EncodedStringVectorBatch& batch = dynamic_cast(rowBatch); - batch.dictionary = this->dictionary; + batch.dictionary = this->dictionary_; // Length buffer is reused to save dictionary entry ids - rle->next(batch.index.data(), numValues, notNull); + rle_->next(batch.index.data(), numValues, notNull); } void StringDictionaryColumnReader::seekToRowGroup( std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - rle->seek(positions.at(columnId)); + rle_->seek(positions.at(columnId)); } class StringDirectColumnReader : public ColumnReader { private: - std::unique_ptr lengthRle; - std::unique_ptr blobStream; - const char* lastBuffer; - size_t lastBufferLength; + std::unique_ptr lengthRle_; + std::unique_ptr blobStream_; + const char* lastBuffer_; + size_t lastBufferLength_; /** * Compute the total length of the values. @@ -697,11 +697,11 @@ namespace orc { std::unique_ptr stream = stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true); if (stream == nullptr) throw ParseError("LENGTH stream not found in StringDirectColumn"); - lengthRle = createRleDecoder(std::move(stream), false, rleVersion, memoryPool, metrics); - blobStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); - if (blobStream == nullptr) throw ParseError("DATA stream not found in StringDirectColumn"); - lastBuffer = nullptr; - lastBufferLength = 0; + lengthRle_ = createRleDecoder(std::move(stream), false, rleVersion, memoryPool, metrics); + blobStream_ = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); + if (blobStream_ == nullptr) throw ParseError("DATA stream not found in StringDirectColumn"); + lastBuffer_ = nullptr; + lastBufferLength_ = 0; } StringDirectColumnReader::~StringDirectColumnReader() { @@ -717,25 +717,25 @@ namespace orc { // read the lengths, so we know haw many bytes to skip while (done < numValues) { uint64_t step = std::min(BUFFER_SIZE, static_cast(numValues - done)); - lengthRle->next(buffer, step, nullptr); + lengthRle_->next(buffer, step, nullptr); totalBytes += computeSize(buffer, nullptr, step); done += step; } - if (totalBytes <= lastBufferLength) { + if (totalBytes <= lastBufferLength_) { // subtract the needed bytes from the ones left over - lastBufferLength -= totalBytes; - lastBuffer += totalBytes; + lastBufferLength_ -= totalBytes; + lastBuffer_ += totalBytes; } else { // move the stream forward after accounting for the buffered bytes - totalBytes -= lastBufferLength; + totalBytes -= lastBufferLength_; const size_t cap = static_cast(std::numeric_limits::max()); while (totalBytes != 0) { size_t step = totalBytes > cap ? cap : totalBytes; - blobStream->Skip(static_cast(step)); + blobStream_->Skip(static_cast(step)); totalBytes -= step; } - lastBufferLength = 0; - lastBuffer = nullptr; + lastBufferLength_ = 0; + lastBuffer_ = nullptr; } return numValues; } @@ -767,7 +767,7 @@ namespace orc { int64_t* lengthPtr = byteBatch.length.data(); // read the length vector - lengthRle->next(lengthPtr, numValues, notNull); + lengthRle_->next(lengthPtr, numValues, notNull); // figure out the total length of data we need from the blob stream const size_t totalLength = computeSize(lengthPtr, notNull, numValues); @@ -777,23 +777,23 @@ namespace orc { size_t bytesBuffered = 0; byteBatch.blob.resize(totalLength); char* ptr = byteBatch.blob.data(); - while (bytesBuffered + lastBufferLength < totalLength) { - memcpy(ptr + bytesBuffered, lastBuffer, lastBufferLength); - bytesBuffered += lastBufferLength; + while (bytesBuffered + lastBufferLength_ < totalLength) { + memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_); + bytesBuffered += lastBufferLength_; const void* readBuffer; int readLength; - if (!blobStream->Next(&readBuffer, &readLength)) { + if (!blobStream_->Next(&readBuffer, &readLength)) { throw ParseError("failed to read in StringDirectColumnReader.next"); } - lastBuffer = static_cast(readBuffer); - lastBufferLength = static_cast(readLength); + lastBuffer_ = static_cast(readBuffer); + lastBufferLength_ = static_cast(readLength); } if (bytesBuffered < totalLength) { size_t moreBytes = totalLength - bytesBuffered; - memcpy(ptr + bytesBuffered, lastBuffer, moreBytes); - lastBuffer += moreBytes; - lastBufferLength -= moreBytes; + memcpy(ptr + bytesBuffered, lastBuffer_, moreBytes); + lastBuffer_ += moreBytes; + lastBufferLength_ -= moreBytes; } size_t filledSlots = 0; @@ -818,16 +818,16 @@ namespace orc { void StringDirectColumnReader::seekToRowGroup( std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - blobStream->seek(positions.at(columnId)); - lengthRle->seek(positions.at(columnId)); + blobStream_->seek(positions.at(columnId)); + lengthRle_->seek(positions.at(columnId)); // clear buffer state after seek - lastBuffer = nullptr; - lastBufferLength = 0; + lastBuffer_ = nullptr; + lastBufferLength_ = 0; } class StructColumnReader : public ColumnReader { private: - std::vector> children; + std::vector> children_; public: StructColumnReader(const Type& type, StripeStreams& stripe, bool useTightNumericVector = false, @@ -857,7 +857,7 @@ namespace orc { for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) { const Type& child = *type.getSubtype(i); if (selectedColumns[static_cast(child.getColumnId())]) { - children.push_back( + children_.push_back( buildReader(child, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow)); } } @@ -872,7 +872,7 @@ namespace orc { uint64_t StructColumnReader::skip(uint64_t numValues) { numValues = ColumnReader::skip(numValues); - for (auto& ptr : children) { + for (auto& ptr : children_) { ptr->skip(numValues); } return numValues; @@ -893,7 +893,7 @@ namespace orc { ColumnReader::next(rowBatch, numValues, notNull); uint64_t i = 0; notNull = rowBatch.hasNulls ? rowBatch.notNull.data() : nullptr; - for (auto iter = children.begin(); iter != children.end(); ++iter, ++i) { + for (auto iter = children_.begin(); iter != children_.end(); ++iter, ++i) { if (encoded) { (*iter)->nextEncoded(*(dynamic_cast(rowBatch).fields[i]), numValues, notNull); @@ -907,15 +907,15 @@ namespace orc { std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - for (auto& ptr : children) { + for (auto& ptr : children_) { ptr->seekToRowGroup(positions); } } class ListColumnReader : public ColumnReader { private: - std::unique_ptr child; - std::unique_ptr rle; + std::unique_ptr child_; + std::unique_ptr rle_; public: ListColumnReader(const Type& type, StripeStreams& stipe, bool useTightNumericVector = false, @@ -945,10 +945,10 @@ namespace orc { std::unique_ptr stream = stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true); if (stream == nullptr) throw ParseError("LENGTH stream not found in List column"); - rle = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics); + rle_ = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics); const Type& childType = *type.getSubtype(0); if (selectedColumns[static_cast(childType.getColumnId())]) { - child = buildReader(childType, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow); + child_ = buildReader(childType, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow); } } @@ -958,7 +958,7 @@ namespace orc { uint64_t ListColumnReader::skip(uint64_t numValues) { numValues = ColumnReader::skip(numValues); - ColumnReader* childReader = child.get(); + ColumnReader* childReader = child_.get(); if (childReader) { const uint64_t BUFFER_SIZE = 1024; int64_t buffer[BUFFER_SIZE]; @@ -966,7 +966,7 @@ namespace orc { uint64_t lengthsRead = 0; while (lengthsRead < numValues) { uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE); - rle->next(buffer, chunk, nullptr); + rle_->next(buffer, chunk, nullptr); for (size_t i = 0; i < chunk; ++i) { childrenElements += static_cast(buffer[i]); } @@ -974,7 +974,7 @@ namespace orc { } childReader->skip(childrenElements); } else { - rle->skip(numValues); + rle_->skip(numValues); } return numValues; } @@ -995,7 +995,7 @@ namespace orc { ListVectorBatch& listBatch = dynamic_cast(rowBatch); int64_t* offsets = listBatch.offsets.data(); notNull = listBatch.hasNulls ? listBatch.notNull.data() : nullptr; - rle->next(offsets, numValues, notNull); + rle_->next(offsets, numValues, notNull); uint64_t totalChildren = 0; if (notNull) { for (size_t i = 0; i < numValues; ++i) { @@ -1015,7 +1015,7 @@ namespace orc { } } offsets[numValues] = static_cast(totalChildren); - ColumnReader* childReader = child.get(); + ColumnReader* childReader = child_.get(); if (childReader) { if (encoded) { childReader->nextEncoded(*(listBatch.elements.get()), totalChildren, nullptr); @@ -1027,17 +1027,17 @@ namespace orc { void ListColumnReader::seekToRowGroup(std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - rle->seek(positions.at(columnId)); - if (child.get()) { - child->seekToRowGroup(positions); + rle_->seek(positions.at(columnId)); + if (child_.get()) { + child_->seekToRowGroup(positions); } } class MapColumnReader : public ColumnReader { private: - std::unique_ptr keyReader; - std::unique_ptr elementReader; - std::unique_ptr rle; + std::unique_ptr keyReader_; + std::unique_ptr elementReader_; + std::unique_ptr rle_; public: MapColumnReader(const Type& type, StripeStreams& stipe, bool useTightNumericVector = false, @@ -1066,15 +1066,15 @@ namespace orc { std::unique_ptr stream = stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true); if (stream == nullptr) throw ParseError("LENGTH stream not found in Map column"); - rle = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics); + rle_ = createRleDecoder(std::move(stream), false, vers, memoryPool, metrics); const Type& keyType = *type.getSubtype(0); if (selectedColumns[static_cast(keyType.getColumnId())]) { - keyReader = + keyReader_ = buildReader(keyType, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow); } const Type& elementType = *type.getSubtype(1); if (selectedColumns[static_cast(elementType.getColumnId())]) { - elementReader = + elementReader_ = buildReader(elementType, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow); } } @@ -1085,8 +1085,8 @@ namespace orc { uint64_t MapColumnReader::skip(uint64_t numValues) { numValues = ColumnReader::skip(numValues); - ColumnReader* rawKeyReader = keyReader.get(); - ColumnReader* rawElementReader = elementReader.get(); + ColumnReader* rawKeyReader = keyReader_.get(); + ColumnReader* rawElementReader = elementReader_.get(); if (rawKeyReader || rawElementReader) { const uint64_t BUFFER_SIZE = 1024; int64_t buffer[BUFFER_SIZE]; @@ -1094,7 +1094,7 @@ namespace orc { uint64_t lengthsRead = 0; while (lengthsRead < numValues) { uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE); - rle->next(buffer, chunk, nullptr); + rle_->next(buffer, chunk, nullptr); for (size_t i = 0; i < chunk; ++i) { childrenElements += static_cast(buffer[i]); } @@ -1107,7 +1107,7 @@ namespace orc { rawElementReader->skip(childrenElements); } } else { - rle->skip(numValues); + rle_->skip(numValues); } return numValues; } @@ -1128,7 +1128,7 @@ namespace orc { MapVectorBatch& mapBatch = dynamic_cast(rowBatch); int64_t* offsets = mapBatch.offsets.data(); notNull = mapBatch.hasNulls ? mapBatch.notNull.data() : nullptr; - rle->next(offsets, numValues, notNull); + rle_->next(offsets, numValues, notNull); uint64_t totalChildren = 0; if (notNull) { for (size_t i = 0; i < numValues; ++i) { @@ -1148,7 +1148,7 @@ namespace orc { } } offsets[numValues] = static_cast(totalChildren); - ColumnReader* rawKeyReader = keyReader.get(); + ColumnReader* rawKeyReader = keyReader_.get(); if (rawKeyReader) { if (encoded) { rawKeyReader->nextEncoded(*(mapBatch.keys.get()), totalChildren, nullptr); @@ -1156,7 +1156,7 @@ namespace orc { rawKeyReader->next(*(mapBatch.keys.get()), totalChildren, nullptr); } } - ColumnReader* rawElementReader = elementReader.get(); + ColumnReader* rawElementReader = elementReader_.get(); if (rawElementReader) { if (encoded) { rawElementReader->nextEncoded(*(mapBatch.elements.get()), totalChildren, nullptr); @@ -1168,21 +1168,21 @@ namespace orc { void MapColumnReader::seekToRowGroup(std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - rle->seek(positions.at(columnId)); - if (keyReader.get()) { - keyReader->seekToRowGroup(positions); + rle_->seek(positions.at(columnId)); + if (keyReader_.get()) { + keyReader_->seekToRowGroup(positions); } - if (elementReader.get()) { - elementReader->seekToRowGroup(positions); + if (elementReader_.get()) { + elementReader_->seekToRowGroup(positions); } } class UnionColumnReader : public ColumnReader { private: - std::unique_ptr rle; - std::vector> childrenReader; - std::vector childrenCounts; - uint64_t numChildren; + std::unique_ptr rle_; + std::vector> childrenReader_; + std::vector childrenCounts_; + uint64_t numChildren_; public: UnionColumnReader(const Type& type, StripeStreams& stipe, bool useTightNumericVector = false, @@ -1205,20 +1205,20 @@ namespace orc { bool useTightNumericVector, bool throwOnSchemaEvolutionOverflow) : ColumnReader(type, stripe) { - numChildren = type.getSubtypeCount(); - childrenReader.resize(numChildren); - childrenCounts.resize(numChildren); + numChildren_ = type.getSubtypeCount(); + childrenReader_.resize(numChildren_); + childrenCounts_.resize(numChildren_); std::unique_ptr stream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); if (stream == nullptr) throw ParseError("LENGTH stream not found in Union column"); - rle = createByteRleDecoder(std::move(stream), metrics); + rle_ = createByteRleDecoder(std::move(stream), metrics); // figure out which types are selected const std::vector selectedColumns = stripe.getSelectedColumns(); - for (unsigned int i = 0; i < numChildren; ++i) { + for (unsigned int i = 0; i < numChildren_; ++i) { const Type& child = *type.getSubtype(i); if (selectedColumns[static_cast(child.getColumnId())]) { - childrenReader[i] = + childrenReader_[i] = buildReader(child, stripe, useTightNumericVector, throwOnSchemaEvolutionOverflow); } } @@ -1229,19 +1229,19 @@ namespace orc { const uint64_t BUFFER_SIZE = 1024; char buffer[BUFFER_SIZE]; uint64_t lengthsRead = 0; - int64_t* counts = childrenCounts.data(); - memset(counts, 0, sizeof(int64_t) * numChildren); + int64_t* counts = childrenCounts_.data(); + memset(counts, 0, sizeof(int64_t) * numChildren_); while (lengthsRead < numValues) { uint64_t chunk = std::min(numValues - lengthsRead, BUFFER_SIZE); - rle->next(buffer, chunk, nullptr); + rle_->next(buffer, chunk, nullptr); for (size_t i = 0; i < chunk; ++i) { counts[static_cast(buffer[i])] += 1; } lengthsRead += chunk; } - for (size_t i = 0; i < numChildren; ++i) { - if (counts[i] != 0 && childrenReader[i] != nullptr) { - childrenReader[i]->skip(static_cast(counts[i])); + for (size_t i = 0; i < numChildren_; ++i) { + if (counts[i] != 0 && childrenReader_[i] != nullptr) { + childrenReader_[i]->skip(static_cast(counts[i])); } } return numValues; @@ -1262,11 +1262,11 @@ namespace orc { ColumnReader::next(rowBatch, numValues, notNull); UnionVectorBatch& unionBatch = dynamic_cast(rowBatch); uint64_t* offsets = unionBatch.offsets.data(); - int64_t* counts = childrenCounts.data(); - memset(counts, 0, sizeof(int64_t) * numChildren); + int64_t* counts = childrenCounts_.data(); + memset(counts, 0, sizeof(int64_t) * numChildren_); unsigned char* tags = unionBatch.tags.data(); notNull = unionBatch.hasNulls ? unionBatch.notNull.data() : nullptr; - rle->next(reinterpret_cast(tags), numValues, notNull); + rle_->next(reinterpret_cast(tags), numValues, notNull); // set the offsets for each row if (notNull) { for (size_t i = 0; i < numValues; ++i) { @@ -1280,13 +1280,13 @@ namespace orc { } } // read the right number of each child column - for (size_t i = 0; i < numChildren; ++i) { - if (childrenReader[i] != nullptr) { + for (size_t i = 0; i < numChildren_; ++i) { + if (childrenReader_[i] != nullptr) { if (encoded) { - childrenReader[i]->nextEncoded(*(unionBatch.children[i]), + childrenReader_[i]->nextEncoded(*(unionBatch.children[i]), static_cast(counts[i]), nullptr); } else { - childrenReader[i]->next(*(unionBatch.children[i]), static_cast(counts[i]), + childrenReader_[i]->next(*(unionBatch.children[i]), static_cast(counts[i]), nullptr); } } @@ -1296,10 +1296,10 @@ namespace orc { void UnionColumnReader::seekToRowGroup( std::unordered_map& positions) { ColumnReader::seekToRowGroup(positions); - rle->seek(positions.at(columnId)); - for (size_t i = 0; i < numChildren; ++i) { - if (childrenReader[i] != nullptr) { - childrenReader[i]->seekToRowGroup(positions); + rle_->seek(positions.at(columnId)); + for (size_t i = 0; i < numChildren_; ++i) { + if (childrenReader_[i] != nullptr) { + childrenReader_[i]->seekToRowGroup(positions); } } } @@ -1597,8 +1597,8 @@ namespace orc { class DecimalHive11ColumnReader : public Decimal64ColumnReader { private: - bool throwOnOverflow; - std::ostream* errorStream; + bool throwOnOverflow_; + std::ostream* errorStream_; /** * Read an Int128 from the stream and correct it to the desired scale. @@ -1647,8 +1647,8 @@ namespace orc { DecimalHive11ColumnReader::DecimalHive11ColumnReader(const Type& type, StripeStreams& stripe) : Decimal64ColumnReader(type, stripe) { scale = stripe.getForcedScaleOnHive11Decimal(); - throwOnOverflow = stripe.getThrowOnHive11DecimalOverflow(); - errorStream = stripe.getErrorStream(); + throwOnOverflow_ = stripe.getThrowOnHive11DecimalOverflow(); + errorStream_ = stripe.getErrorStream(); } DecimalHive11ColumnReader::~DecimalHive11ColumnReader() { @@ -1672,10 +1672,10 @@ namespace orc { for (size_t i = 0; i < numValues; ++i) { if (notNull[i]) { if (!readInt128(values[i], static_cast(scaleBuffer[i]))) { - if (throwOnOverflow) { + if (throwOnOverflow_) { throw ParseError("Hive 0.11 decimal was more than 38 digits."); } else { - *errorStream << "Warning: " + *errorStream_ << "Warning: " << "Hive 0.11 decimal with more than 38 digits " << "replaced by NULL.\n"; notNull[i] = false; @@ -1686,10 +1686,10 @@ namespace orc { } else { for (size_t i = 0; i < numValues; ++i) { if (!readInt128(values[i], static_cast(scaleBuffer[i]))) { - if (throwOnOverflow) { + if (throwOnOverflow_) { throw ParseError("Hive 0.11 decimal was more than 38 digits."); } else { - *errorStream << "Warning: " + *errorStream_ << "Warning: " << "Hive 0.11 decimal with more than 38 digits " << "replaced by NULL.\n"; batch.hasNulls = true; diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc index f24be1f0b26..3cf2ca6afba 100644 --- a/c++/src/ColumnWriter.cc +++ b/c++/src/ColumnWriter.cc @@ -33,24 +33,24 @@ namespace orc { class StreamsFactoryImpl : public StreamsFactory { public: StreamsFactoryImpl(const WriterOptions& writerOptions, OutputStream* outputStream) - : options(writerOptions), outStream(outputStream) {} + : options_(writerOptions), outStream_(outputStream) {} virtual std::unique_ptr createStream( proto::Stream_Kind kind) const override; private: - const WriterOptions& options; - OutputStream* outStream; + const WriterOptions& options_; + OutputStream* outStream_; }; std::unique_ptr StreamsFactoryImpl::createStream(proto::Stream_Kind) const { // In the future, we can decide compression strategy and modifier // based on stream kind. But for now we just use the setting from // WriterOption - return createCompressor(options.getCompression(), outStream, options.getCompressionStrategy(), + return createCompressor(options_.getCompression(), outStream_, options_.getCompressionStrategy(), // BufferedOutputStream initial capacity - options.getOutputBufferCapacity(), options.getCompressionBlockSize(), - *options.getMemoryPool(), options.getWriterMetrics()); + options_.getOutputBufferCapacity(), options_.getCompressionBlockSize(), + *options_.getMemoryPool(), options_.getWriterMetrics()); } std::unique_ptr createStreamsFactory(const WriterOptions& options, @@ -282,7 +282,7 @@ namespace orc { virtual void reset() override; private: - std::vector> children; + std::vector> children_; }; StructColumnWriter::StructColumnWriter(const Type& type, const StreamsFactory& factory, @@ -290,7 +290,7 @@ namespace orc { : ColumnWriter(type, factory, options) { for (unsigned int i = 0; i < type.getSubtypeCount(); ++i) { const Type& child = *type.getSubtype(i); - children.push_back(buildWriter(child, factory, options)); + children_.push_back(buildWriter(child, factory, options)); } if (enableIndex) { @@ -307,8 +307,8 @@ namespace orc { ColumnWriter::add(rowBatch, offset, numValues, incomingMask); const char* notNull = structBatch->hasNulls ? structBatch->notNull.data() + offset : nullptr; - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->add(*structBatch->fields[i], offset, numValues, notNull); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->add(*structBatch->fields[i], offset, numValues, notNull); } // update stats @@ -330,22 +330,22 @@ namespace orc { void StructColumnWriter::flush(std::vector& streams) { ColumnWriter::flush(streams); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->flush(streams); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->flush(streams); } } void StructColumnWriter::writeIndex(std::vector& streams) const { ColumnWriter::writeIndex(streams); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->writeIndex(streams); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->writeIndex(streams); } } uint64_t StructColumnWriter::getEstimatedSize() const { uint64_t size = ColumnWriter::getEstimatedSize(); - for (uint32_t i = 0; i < children.size(); ++i) { - size += children[i]->getEstimatedSize(); + for (uint32_t i = 0; i < children_.size(); ++i) { + size += children_[i]->getEstimatedSize(); } return size; } @@ -355,62 +355,62 @@ namespace orc { encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); encoding.set_dictionary_size(0); encodings.push_back(encoding); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->getColumnEncoding(encodings); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->getColumnEncoding(encodings); } } void StructColumnWriter::getStripeStatistics(std::vector& stats) const { ColumnWriter::getStripeStatistics(stats); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->getStripeStatistics(stats); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->getStripeStatistics(stats); } } void StructColumnWriter::mergeStripeStatsIntoFileStats() { ColumnWriter::mergeStripeStatsIntoFileStats(); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->mergeStripeStatsIntoFileStats(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->mergeStripeStatsIntoFileStats(); } } void StructColumnWriter::getFileStatistics(std::vector& stats) const { ColumnWriter::getFileStatistics(stats); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->getFileStatistics(stats); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->getFileStatistics(stats); } } void StructColumnWriter::mergeRowGroupStatsIntoStripeStats() { ColumnWriter::mergeRowGroupStatsIntoStripeStats(); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->mergeRowGroupStatsIntoStripeStats(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->mergeRowGroupStatsIntoStripeStats(); } } void StructColumnWriter::createRowIndexEntry() { ColumnWriter::createRowIndexEntry(); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->createRowIndexEntry(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->createRowIndexEntry(); } } void StructColumnWriter::reset() { ColumnWriter::reset(); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->reset(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->reset(); } } void StructColumnWriter::writeDictionary() { - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->writeDictionary(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->writeDictionary(); } } @@ -435,17 +435,17 @@ namespace orc { std::unique_ptr rleEncoder; private: - RleVersion rleVersion; + RleVersion rleVersion_; }; template IntegerColumnWriter::IntegerColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options) - : ColumnWriter(type, factory, options), rleVersion(options.getRleVersion()) { + : ColumnWriter(type, factory, options), rleVersion_(options.getRleVersion()) { std::unique_ptr dataStream = factory.createStream(proto::Stream_Kind_DATA); - rleEncoder = createRleEncoder(std::move(dataStream), true, rleVersion, memPool, + rleEncoder = createRleEncoder(std::move(dataStream), true, rleVersion_, memPool, options.getAlignedBitpacking()); if (enableIndex) { @@ -512,7 +512,7 @@ namespace orc { void IntegerColumnWriter::getColumnEncoding( std::vector& encodings) const { proto::ColumnEncoding encoding; - encoding.set_kind(RleVersionMapper(rleVersion)); + encoding.set_kind(RleVersionMapper(rleVersion_)); encoding.set_dictionary_size(0); if (enableBloomFilter) { encoding.set_bloom_encoding(BloomFilterVersion::UTF8); @@ -543,7 +543,7 @@ namespace orc { virtual void recordPosition() const override; private: - std::unique_ptr byteRleEncoder; + std::unique_ptr byteRleEncoder_; }; template @@ -552,7 +552,7 @@ namespace orc { : ColumnWriter(type, factory, options) { std::unique_ptr dataStream = factory.createStream(proto::Stream_Kind_DATA); - byteRleEncoder = createByteRleEncoder(std::move(dataStream)); + byteRleEncoder_ = createByteRleEncoder(std::move(dataStream)); if (enableIndex) { recordPosition(); @@ -581,7 +581,7 @@ namespace orc { for (uint64_t i = 0; i < numValues; ++i) { byteData[i] = static_cast(data[i]); } - byteRleEncoder->add(byteData, numValues, notNull); + byteRleEncoder_->add(byteData, numValues, notNull); uint64_t count = 0; for (uint64_t i = 0; i < numValues; ++i) { @@ -606,14 +606,14 @@ namespace orc { proto::Stream stream; stream.set_kind(proto::Stream_Kind_DATA); stream.set_column(static_cast(columnId)); - stream.set_length(byteRleEncoder->flush()); + stream.set_length(byteRleEncoder_->flush()); streams.push_back(stream); } template uint64_t ByteColumnWriter::getEstimatedSize() const { uint64_t size = ColumnWriter::getEstimatedSize(); - size += byteRleEncoder->getBufferSize(); + size += byteRleEncoder_->getBufferSize(); return size; } @@ -632,7 +632,7 @@ namespace orc { template void ByteColumnWriter::recordPosition() const { ColumnWriter::recordPosition(); - byteRleEncoder->recordPosition(rowIndexPosition.get()); + byteRleEncoder_->recordPosition(rowIndexPosition.get()); } template @@ -653,7 +653,7 @@ namespace orc { virtual void recordPosition() const override; private: - std::unique_ptr rleEncoder; + std::unique_ptr rleEncoder_; }; template @@ -663,7 +663,7 @@ namespace orc { : ColumnWriter(type, factory, options) { std::unique_ptr dataStream = factory.createStream(proto::Stream_Kind_DATA); - rleEncoder = createBooleanRleEncoder(std::move(dataStream)); + rleEncoder_ = createBooleanRleEncoder(std::move(dataStream)); if (enableIndex) { recordPosition(); @@ -694,7 +694,7 @@ namespace orc { for (uint64_t i = 0; i < numValues; ++i) { byteData[i] = static_cast(data[i]); } - rleEncoder->add(byteData, numValues, notNull); + rleEncoder_->add(byteData, numValues, notNull); uint64_t count = 0; for (uint64_t i = 0; i < numValues; ++i) { @@ -719,14 +719,14 @@ namespace orc { proto::Stream stream; stream.set_kind(proto::Stream_Kind_DATA); stream.set_column(static_cast(columnId)); - stream.set_length(rleEncoder->flush()); + stream.set_length(rleEncoder_->flush()); streams.push_back(stream); } template uint64_t BooleanColumnWriter::getEstimatedSize() const { uint64_t size = ColumnWriter::getEstimatedSize(); - size += rleEncoder->getBufferSize(); + size += rleEncoder_->getBufferSize(); return size; } @@ -745,7 +745,7 @@ namespace orc { template void BooleanColumnWriter::recordPosition() const { ColumnWriter::recordPosition(); - rleEncoder->recordPosition(rowIndexPosition.get()); + rleEncoder_->recordPosition(rowIndexPosition.get()); } template @@ -766,9 +766,9 @@ namespace orc { virtual void recordPosition() const override; private: - bool isFloat; - std::unique_ptr dataStream; - DataBuffer buffer; + bool isFloat_; + std::unique_ptr dataStream_; + DataBuffer buffer_; }; template @@ -777,10 +777,10 @@ namespace orc { const WriterOptions& options, bool isFloatType) : ColumnWriter(type, factory, options), - isFloat(isFloatType), - buffer(*options.getMemoryPool()) { - dataStream.reset(new AppendOnlyBufferedStream(factory.createStream(proto::Stream_Kind_DATA))); - buffer.resize(isFloat ? 4 : 8); + isFloat_(isFloatType), + buffer_(*options.getMemoryPool()) { + dataStream_.reset(new AppendOnlyBufferedStream(factory.createStream(proto::Stream_Kind_DATA))); + buffer_.resize(isFloat_ ? 4 : 8); if (enableIndex) { recordPosition(); @@ -816,17 +816,17 @@ namespace orc { const ValueType* doubleData = dblBatch->data.data() + offset; const char* notNull = dblBatch->hasNulls ? dblBatch->notNull.data() + offset : nullptr; - size_t bytes = isFloat ? 4 : 8; - char* data = buffer.data(); + size_t bytes = isFloat_ ? 4 : 8; + char* data = buffer_.data(); uint64_t count = 0; for (uint64_t i = 0; i < numValues; ++i) { if (!notNull || notNull[i]) { - if (isFloat) { + if (isFloat_) { encodeFloatNum(static_cast(doubleData[i]), data); } else { encodeFloatNum(static_cast(doubleData[i]), data); } - dataStream->write(data, bytes); + dataStream_->write(data, bytes); ++count; if (enableBloomFilter) { bloomFilter->addDouble(static_cast(doubleData[i])); @@ -847,14 +847,14 @@ namespace orc { proto::Stream stream; stream.set_kind(proto::Stream_Kind_DATA); stream.set_column(static_cast(columnId)); - stream.set_length(dataStream->flush()); + stream.set_length(dataStream_->flush()); streams.push_back(stream); } template uint64_t FloatingColumnWriter::getEstimatedSize() const { uint64_t size = ColumnWriter::getEstimatedSize(); - size += dataStream->getSize(); + size += dataStream_->getSize(); return size; } @@ -873,7 +873,7 @@ namespace orc { template void FloatingColumnWriter::recordPosition() const { ColumnWriter::recordPosition(); - dataStream->recordPosition(rowIndexPosition.get()); + dataStream_->recordPosition(rowIndexPosition.get()); } /** @@ -887,7 +887,7 @@ namespace orc { size_t length; }; - SortedStringDictionary() : totalLength(0) {} + SortedStringDictionary() : totalLength_(0) {} // insert a new string into dictionary, return its insertion order size_t insert(const char* data, size_t len); @@ -920,29 +920,29 @@ namespace orc { } }; - std::map dict; - std::vector> data; - uint64_t totalLength; + std::map dict_; + std::vector> data_; + uint64_t totalLength_; // use friend class here to avoid being bothered by const function calls friend class StringColumnWriter; friend class CharColumnWriter; friend class VarCharColumnWriter; // store indexes of insertion order in the dictionary for not-null rows - std::vector idxInDictBuffer; + std::vector idxInDictBuffer_; }; // insert a new string into dictionary, return its insertion order size_t SortedStringDictionary::insert(const char* str, size_t len) { - auto ret = dict.insert({DictEntry(str, len), dict.size()}); + auto ret = dict_.insert({DictEntry(str, len), dict_.size()}); if (ret.second) { // make a copy to internal storage - data.push_back(std::vector(len)); - memcpy(data.back().data(), str, len); + data_.push_back(std::vector(len)); + memcpy(data_.back().data(), str, len); // update dictionary entry to link pointer to internal storage DictEntry* entry = const_cast(&(ret.first->first)); - entry->data = data.back().data(); - totalLength += len; + entry->data = data_.back().data(); + totalLength_ += len; } return ret.first->second; } @@ -950,7 +950,7 @@ namespace orc { // write dictionary data & length to output buffer void SortedStringDictionary::flush(AppendOnlyBufferedStream* dataStream, RleEncoder* lengthEncoder) const { - for (auto it = dict.cbegin(); it != dict.cend(); ++it) { + for (auto it = dict_.cbegin(); it != dict_.cend(); ++it) { dataStream->write(it->first.data, it->first.length); lengthEncoder->write(static_cast(it->first.length)); } @@ -968,9 +968,9 @@ namespace orc { */ void SortedStringDictionary::reorder(std::vector& idxBuffer) const { // iterate the dictionary to get mapping from insertion order to value order - std::vector mapping(dict.size()); + std::vector mapping(dict_.size()); size_t dictIdx = 0; - for (auto it = dict.cbegin(); it != dict.cend(); ++it) { + for (auto it = dict_.cbegin(); it != dict_.cend(); ++it) { mapping[it->second] = dictIdx++; } @@ -983,26 +983,26 @@ namespace orc { // get dict entries in insertion order void SortedStringDictionary::getEntriesInInsertionOrder( std::vector& entries) const { - entries.resize(dict.size()); - for (auto it = dict.cbegin(); it != dict.cend(); ++it) { + entries.resize(dict_.size()); + for (auto it = dict_.cbegin(); it != dict_.cend(); ++it) { entries[it->second] = &(it->first); } } // return count of entries size_t SortedStringDictionary::size() const { - return dict.size(); + return dict_.size(); } // return total length of strings in the dictioanry uint64_t SortedStringDictionary::length() const { - return totalLength; + return totalLength_; } void SortedStringDictionary::clear() { - totalLength = 0; - data.clear(); - dict.clear(); + totalLength_ = 0; + data_.clear(); + dict_.clear(); } class StringColumnWriter : public ColumnWriter { @@ -1123,7 +1123,7 @@ namespace orc { const size_t len = static_cast(length[i]); if (useDictionary) { size_t index = dictionary.insert(data[i], len); - dictionary.idxInDictBuffer.push_back(static_cast(index)); + dictionary.idxInDictBuffer_.push_back(static_cast(index)); } else { directDataStream->write(data[i], len); } @@ -1184,7 +1184,7 @@ namespace orc { } else { size += dictionary.length(); size += dictionary.size() * sizeof(int32_t); - size += dictionary.idxInDictBuffer.size() * sizeof(int32_t); + size += dictionary.idxInDictBuffer_.size() * sizeof(int32_t); if (useCompression) { size /= 3; // estimated ratio is 3:1 } @@ -1215,7 +1215,7 @@ namespace orc { directLengthEncoder->recordPosition(rowIndexPosition.get()); } else { if (enableIndex) { - startOfRowGroups.push_back(dictionary.idxInDictBuffer.size()); + startOfRowGroups.push_back(dictionary.idxInDictBuffer_.size()); } } } @@ -1223,7 +1223,7 @@ namespace orc { bool StringColumnWriter::checkDictionaryKeyRatio() { if (!doneDictionaryCheck) { useDictionary = dictionary.size() <= - static_cast(static_cast(dictionary.idxInDictBuffer.size()) * + static_cast(static_cast(dictionary.idxInDictBuffer_.size()) * dictSizeThreshold); doneDictionaryCheck = true; } @@ -1244,7 +1244,7 @@ namespace orc { ColumnWriter::reset(); dictionary.clear(); - dictionary.idxInDictBuffer.resize(0); + dictionary.idxInDictBuffer_.resize(0); startOfRowGroups.clear(); startOfRowGroups.push_back(0); } @@ -1277,7 +1277,7 @@ namespace orc { dictStream.reset(nullptr); dictionary.clear(); - dictionary.idxInDictBuffer.clear(); + dictionary.idxInDictBuffer_.clear(); startOfRowGroups.clear(); } @@ -1295,10 +1295,10 @@ namespace orc { dictionary.flush(dictStream.get(), dictLengthEncoder.get()); // convert index from insertion order to dictionary order - dictionary.reorder(dictionary.idxInDictBuffer); + dictionary.reorder(dictionary.idxInDictBuffer_); // write data sequences - int64_t* data = dictionary.idxInDictBuffer.data(); + int64_t* data = dictionary.idxInDictBuffer_.data(); if (enableIndex) { size_t prevOffset = 0; for (size_t i = 0; i < startOfRowGroups.size(); ++i) { @@ -1319,10 +1319,10 @@ namespace orc { prevOffset = offset; } - dictDataEncoder->add(data + prevOffset, dictionary.idxInDictBuffer.size() - prevOffset, + dictDataEncoder->add(data + prevOffset, dictionary.idxInDictBuffer_.size() - prevOffset, nullptr); } else { - dictDataEncoder->add(data, dictionary.idxInDictBuffer.size(), nullptr); + dictDataEncoder->add(data, dictionary.idxInDictBuffer_.size(), nullptr); } } } @@ -1345,9 +1345,9 @@ namespace orc { // store each length of the data into a vector const SortedStringDictionary::DictEntry* dictEntry = nullptr; - for (uint64_t i = 0; i != dictionary.idxInDictBuffer.size(); ++i) { + for (uint64_t i = 0; i != dictionary.idxInDictBuffer_.size(); ++i) { // write one row data in direct encoding - dictEntry = entries[static_cast(dictionary.idxInDictBuffer[i])]; + dictEntry = entries[static_cast(dictionary.idxInDictBuffer_[i])]; directDataStream->write(dictEntry->data, dictEntry->length); directLengthEncoder->write(static_cast(dictEntry->length)); } @@ -1428,18 +1428,18 @@ namespace orc { public: CharColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options) : StringColumnWriter(type, factory, options), - maxLength(type.getMaximumLength()), - padBuffer(*options.getMemoryPool()) { + maxLength_(type.getMaximumLength()), + padBuffer_(*options.getMemoryPool()) { // utf-8 is currently 4 bytes long, but it could be up to 6 - padBuffer.resize(maxLength * 6); + padBuffer_.resize(maxLength_ * 6); } virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues, const char* incomingMask) override; private: - uint64_t maxLength; - DataBuffer padBuffer; + uint64_t maxLength_; + DataBuffer padBuffer_; }; void CharColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues, @@ -1467,22 +1467,22 @@ namespace orc { const char* charData = nullptr; uint64_t originLength = static_cast(length[i]); uint64_t charLength = Utf8Utils::charLength(data[i], originLength); - if (charLength >= maxLength) { + if (charLength >= maxLength_) { charData = data[i]; length[i] = - static_cast(Utf8Utils::truncateBytesTo(maxLength, data[i], originLength)); + static_cast(Utf8Utils::truncateBytesTo(maxLength_, data[i], originLength)); } else { - charData = padBuffer.data(); + charData = padBuffer_.data(); // the padding is exactly 1 byte per char - length[i] = length[i] + static_cast(maxLength - charLength); - memcpy(padBuffer.data(), data[i], originLength); - memset(padBuffer.data() + originLength, ' ', + length[i] = length[i] + static_cast(maxLength_ - charLength); + memcpy(padBuffer_.data(), data[i], originLength); + memset(padBuffer_.data() + originLength, ' ', static_cast(length[i]) - originLength); } if (useDictionary) { size_t index = dictionary.insert(charData, static_cast(length[i])); - dictionary.idxInDictBuffer.push_back(static_cast(index)); + dictionary.idxInDictBuffer_.push_back(static_cast(index)); } else { directDataStream->write(charData, static_cast(length[i])); } @@ -1509,7 +1509,7 @@ namespace orc { public: VarCharColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options) - : StringColumnWriter(type, factory, options), maxLength(type.getMaximumLength()) { + : StringColumnWriter(type, factory, options), maxLength_(type.getMaximumLength()) { // PASS } @@ -1517,7 +1517,7 @@ namespace orc { const char* incomingMask) override; private: - uint64_t maxLength; + uint64_t maxLength_; }; void VarCharColumnWriter::add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues, @@ -1543,12 +1543,12 @@ namespace orc { for (uint64_t i = 0; i < numValues; ++i) { if (!notNull || notNull[i]) { uint64_t itemLength = - Utf8Utils::truncateBytesTo(maxLength, data[i], static_cast(length[i])); + Utf8Utils::truncateBytesTo(maxLength_, data[i], static_cast(length[i])); length[i] = static_cast(itemLength); if (useDictionary) { size_t index = dictionary.insert(data[i], static_cast(length[i])); - dictionary.idxInDictBuffer.push_back(static_cast(index)); + dictionary.idxInDictBuffer_.push_back(static_cast(index)); } else { directDataStream->write(data[i], static_cast(length[i])); } @@ -1642,24 +1642,24 @@ namespace orc { std::unique_ptr secRleEncoder, nanoRleEncoder; private: - RleVersion rleVersion; - const Timezone* timezone; - const bool isUTC; + RleVersion rleVersion_; + const Timezone* timezone_; + const bool isUTC_; }; TimestampColumnWriter::TimestampColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options, bool isInstantType) : ColumnWriter(type, factory, options), - rleVersion(options.getRleVersion()), - timezone(isInstantType ? &getTimezoneByName("GMT") : &options.getTimezone()), - isUTC(isInstantType || options.getTimezoneName() == "GMT") { + rleVersion_(options.getRleVersion()), + timezone_(isInstantType ? &getTimezoneByName("GMT") : &options.getTimezone()), + isUTC_(isInstantType || options.getTimezoneName() == "GMT") { std::unique_ptr dataStream = factory.createStream(proto::Stream_Kind_DATA); std::unique_ptr secondaryStream = factory.createStream(proto::Stream_Kind_SECONDARY); - secRleEncoder = createRleEncoder(std::move(dataStream), true, rleVersion, memPool, + secRleEncoder = createRleEncoder(std::move(dataStream), true, rleVersion_, memPool, options.getAlignedBitpacking()); - nanoRleEncoder = createRleEncoder(std::move(secondaryStream), false, rleVersion, memPool, + nanoRleEncoder = createRleEncoder(std::move(secondaryStream), false, rleVersion_, memPool, options.getAlignedBitpacking()); if (enableIndex) { @@ -1712,8 +1712,8 @@ namespace orc { if (notNull == nullptr || notNull[i]) { // TimestampVectorBatch already stores data in UTC int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000; - if (!isUTC) { - millsUTC = timezone->convertToUTC(secs[i]) * 1000 + nanos[i] / 1000000; + if (!isUTC_) { + millsUTC = timezone_->convertToUTC(secs[i]) * 1000 + nanos[i] / 1000000; } ++count; if (enableBloomFilter) { @@ -1725,7 +1725,7 @@ namespace orc { secs[i] += 1; } - secs[i] -= timezone->getEpoch(); + secs[i] -= timezone_->getEpoch(); nanos[i] = formatNano(nanos[i]); } } @@ -1764,7 +1764,7 @@ namespace orc { void TimestampColumnWriter::getColumnEncoding( std::vector& encodings) const { proto::ColumnEncoding encoding; - encoding.set_kind(RleVersionMapper(rleVersion)); + encoding.set_kind(RleVersionMapper(rleVersion_)); encoding.set_dictionary_size(0); if (enableBloomFilter) { encoding.set_bloom_encoding(BloomFilterVersion::UTF8); @@ -1855,7 +1855,7 @@ namespace orc { std::unique_ptr scaleEncoder; private: - char buffer[10]; + char buffer_[10]; }; Decimal64ColumnWriter::Decimal64ColumnWriter(const Type& type, const StreamsFactory& factory, @@ -1897,7 +1897,7 @@ namespace orc { for (uint64_t i = 0; i < numValues; ++i) { if (!notNull || notNull[i]) { int64_t val = zigZag(values[i]); - char* data = buffer; + char* data = buffer_; while (true) { if ((val & ~0x7f) == 0) { *(data++) = (static_cast(val)); @@ -1908,7 +1908,7 @@ namespace orc { val = (static_cast(val) >> 7); } } - valueStream->write(buffer, static_cast(data - buffer)); + valueStream->write(buffer_, static_cast(data - buffer_)); ++count; if (enableBloomFilter) { std::string decimal = Decimal(values[i], static_cast(scale)).toString(true); @@ -2080,7 +2080,7 @@ namespace orc { const char* incomingMask) override; private: - char buffer[20]; + char buffer_[20]; }; Decimal128ColumnWriter::Decimal128ColumnWriter(const Type& type, const StreamsFactory& factory, @@ -2126,7 +2126,7 @@ namespace orc { for (uint64_t i = 0; i < numValues; ++i) { if (!notNull || notNull[i]) { Int128 val = zigZagInt128(values[i]); - char* data = buffer; + char* data = buffer_; while (true) { if ((val & ~0x7f) == 0) { *(data++) = (static_cast(val.getLowBits())); @@ -2136,7 +2136,7 @@ namespace orc { val >>= 7; } } - valueStream->write(buffer, static_cast(data - buffer)); + valueStream->write(buffer_, static_cast(data - buffer_)); ++count; if (enableBloomFilter) { @@ -2187,21 +2187,21 @@ namespace orc { virtual void reset() override; private: - std::unique_ptr lengthEncoder; - RleVersion rleVersion; - std::unique_ptr child; + std::unique_ptr lengthEncoder_; + RleVersion rleVersion_; + std::unique_ptr child_; }; ListColumnWriter::ListColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options) - : ColumnWriter(type, factory, options), rleVersion(options.getRleVersion()) { + : ColumnWriter(type, factory, options), rleVersion_(options.getRleVersion()) { std::unique_ptr lengthStream = factory.createStream(proto::Stream_Kind_LENGTH); - lengthEncoder = createRleEncoder(std::move(lengthStream), false, rleVersion, memPool, + lengthEncoder_ = createRleEncoder(std::move(lengthStream), false, rleVersion_, memPool, options.getAlignedBitpacking()); if (type.getSubtypeCount() == 1) { - child = buildWriter(*type.getSubtype(0), factory, options); + child_ = buildWriter(*type.getSubtype(0), factory, options); } if (enableIndex) { @@ -2239,10 +2239,10 @@ namespace orc { } // unnecessary to deal with null as elements are packed together - if (child.get()) { - child->add(*listBatch->elements, elemOffset, totalNumValues, nullptr); + if (child_.get()) { + child_->add(*listBatch->elements, elemOffset, totalNumValues, nullptr); } - lengthEncoder->add(offsets, numValues, notNull); + lengthEncoder_->add(offsets, numValues, notNull); if (enableIndex) { if (!notNull) { @@ -2272,93 +2272,93 @@ namespace orc { proto::Stream stream; stream.set_kind(proto::Stream_Kind_LENGTH); stream.set_column(static_cast(columnId)); - stream.set_length(lengthEncoder->flush()); + stream.set_length(lengthEncoder_->flush()); streams.push_back(stream); - if (child.get()) { - child->flush(streams); + if (child_.get()) { + child_->flush(streams); } } void ListColumnWriter::writeIndex(std::vector& streams) const { ColumnWriter::writeIndex(streams); - if (child.get()) { - child->writeIndex(streams); + if (child_.get()) { + child_->writeIndex(streams); } } uint64_t ListColumnWriter::getEstimatedSize() const { uint64_t size = ColumnWriter::getEstimatedSize(); - if (child.get()) { - size += lengthEncoder->getBufferSize(); - size += child->getEstimatedSize(); + if (child_.get()) { + size += lengthEncoder_->getBufferSize(); + size += child_->getEstimatedSize(); } return size; } void ListColumnWriter::getColumnEncoding(std::vector& encodings) const { proto::ColumnEncoding encoding; - encoding.set_kind(RleVersionMapper(rleVersion)); + encoding.set_kind(RleVersionMapper(rleVersion_)); encoding.set_dictionary_size(0); if (enableBloomFilter) { encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); - if (child.get()) { - child->getColumnEncoding(encodings); + if (child_.get()) { + child_->getColumnEncoding(encodings); } } void ListColumnWriter::getStripeStatistics(std::vector& stats) const { ColumnWriter::getStripeStatistics(stats); - if (child.get()) { - child->getStripeStatistics(stats); + if (child_.get()) { + child_->getStripeStatistics(stats); } } void ListColumnWriter::mergeStripeStatsIntoFileStats() { ColumnWriter::mergeStripeStatsIntoFileStats(); - if (child.get()) { - child->mergeStripeStatsIntoFileStats(); + if (child_.get()) { + child_->mergeStripeStatsIntoFileStats(); } } void ListColumnWriter::getFileStatistics(std::vector& stats) const { ColumnWriter::getFileStatistics(stats); - if (child.get()) { - child->getFileStatistics(stats); + if (child_.get()) { + child_->getFileStatistics(stats); } } void ListColumnWriter::mergeRowGroupStatsIntoStripeStats() { ColumnWriter::mergeRowGroupStatsIntoStripeStats(); - if (child.get()) { - child->mergeRowGroupStatsIntoStripeStats(); + if (child_.get()) { + child_->mergeRowGroupStatsIntoStripeStats(); } } void ListColumnWriter::createRowIndexEntry() { ColumnWriter::createRowIndexEntry(); - if (child.get()) { - child->createRowIndexEntry(); + if (child_.get()) { + child_->createRowIndexEntry(); } } void ListColumnWriter::recordPosition() const { ColumnWriter::recordPosition(); - lengthEncoder->recordPosition(rowIndexPosition.get()); + lengthEncoder_->recordPosition(rowIndexPosition.get()); } void ListColumnWriter::reset() { ColumnWriter::reset(); - if (child) { - child->reset(); + if (child_) { + child_->reset(); } } void ListColumnWriter::writeDictionary() { - if (child) { - child->writeDictionary(); + if (child_) { + child_->writeDictionary(); } } @@ -2395,26 +2395,26 @@ namespace orc { virtual void reset() override; private: - std::unique_ptr keyWriter; - std::unique_ptr elemWriter; - std::unique_ptr lengthEncoder; - RleVersion rleVersion; + std::unique_ptr keyWriter_; + std::unique_ptr elemWriter_; + std::unique_ptr lengthEncoder_; + RleVersion rleVersion_; }; MapColumnWriter::MapColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options) - : ColumnWriter(type, factory, options), rleVersion(options.getRleVersion()) { + : ColumnWriter(type, factory, options), rleVersion_(options.getRleVersion()) { std::unique_ptr lengthStream = factory.createStream(proto::Stream_Kind_LENGTH); - lengthEncoder = createRleEncoder(std::move(lengthStream), false, rleVersion, memPool, + lengthEncoder_ = createRleEncoder(std::move(lengthStream), false, rleVersion_, memPool, options.getAlignedBitpacking()); if (type.getSubtypeCount() > 0) { - keyWriter = buildWriter(*type.getSubtype(0), factory, options); + keyWriter_ = buildWriter(*type.getSubtype(0), factory, options); } if (type.getSubtypeCount() > 1) { - elemWriter = buildWriter(*type.getSubtype(1), factory, options); + elemWriter_ = buildWriter(*type.getSubtype(1), factory, options); } if (enableIndex) { @@ -2451,14 +2451,14 @@ namespace orc { offsets[i] = offsets[i + 1] - offsets[i]; } - lengthEncoder->add(offsets, numValues, notNull); + lengthEncoder_->add(offsets, numValues, notNull); // unnecessary to deal with null as keys and values are packed together - if (keyWriter.get()) { - keyWriter->add(*mapBatch->keys, elemOffset, totalNumValues, nullptr); + if (keyWriter_.get()) { + keyWriter_->add(*mapBatch->keys, elemOffset, totalNumValues, nullptr); } - if (elemWriter.get()) { - elemWriter->add(*mapBatch->elements, elemOffset, totalNumValues, nullptr); + if (elemWriter_.get()) { + elemWriter_->add(*mapBatch->elements, elemOffset, totalNumValues, nullptr); } if (enableIndex) { @@ -2489,126 +2489,126 @@ namespace orc { proto::Stream stream; stream.set_kind(proto::Stream_Kind_LENGTH); stream.set_column(static_cast(columnId)); - stream.set_length(lengthEncoder->flush()); + stream.set_length(lengthEncoder_->flush()); streams.push_back(stream); - if (keyWriter.get()) { - keyWriter->flush(streams); + if (keyWriter_.get()) { + keyWriter_->flush(streams); } - if (elemWriter.get()) { - elemWriter->flush(streams); + if (elemWriter_.get()) { + elemWriter_->flush(streams); } } void MapColumnWriter::writeIndex(std::vector& streams) const { ColumnWriter::writeIndex(streams); - if (keyWriter.get()) { - keyWriter->writeIndex(streams); + if (keyWriter_.get()) { + keyWriter_->writeIndex(streams); } - if (elemWriter.get()) { - elemWriter->writeIndex(streams); + if (elemWriter_.get()) { + elemWriter_->writeIndex(streams); } } uint64_t MapColumnWriter::getEstimatedSize() const { uint64_t size = ColumnWriter::getEstimatedSize(); - size += lengthEncoder->getBufferSize(); - if (keyWriter.get()) { - size += keyWriter->getEstimatedSize(); + size += lengthEncoder_->getBufferSize(); + if (keyWriter_.get()) { + size += keyWriter_->getEstimatedSize(); } - if (elemWriter.get()) { - size += elemWriter->getEstimatedSize(); + if (elemWriter_.get()) { + size += elemWriter_->getEstimatedSize(); } return size; } void MapColumnWriter::getColumnEncoding(std::vector& encodings) const { proto::ColumnEncoding encoding; - encoding.set_kind(RleVersionMapper(rleVersion)); + encoding.set_kind(RleVersionMapper(rleVersion_)); encoding.set_dictionary_size(0); if (enableBloomFilter) { encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); - if (keyWriter.get()) { - keyWriter->getColumnEncoding(encodings); + if (keyWriter_.get()) { + keyWriter_->getColumnEncoding(encodings); } - if (elemWriter.get()) { - elemWriter->getColumnEncoding(encodings); + if (elemWriter_.get()) { + elemWriter_->getColumnEncoding(encodings); } } void MapColumnWriter::getStripeStatistics(std::vector& stats) const { ColumnWriter::getStripeStatistics(stats); - if (keyWriter.get()) { - keyWriter->getStripeStatistics(stats); + if (keyWriter_.get()) { + keyWriter_->getStripeStatistics(stats); } - if (elemWriter.get()) { - elemWriter->getStripeStatistics(stats); + if (elemWriter_.get()) { + elemWriter_->getStripeStatistics(stats); } } void MapColumnWriter::mergeStripeStatsIntoFileStats() { ColumnWriter::mergeStripeStatsIntoFileStats(); - if (keyWriter.get()) { - keyWriter->mergeStripeStatsIntoFileStats(); + if (keyWriter_.get()) { + keyWriter_->mergeStripeStatsIntoFileStats(); } - if (elemWriter.get()) { - elemWriter->mergeStripeStatsIntoFileStats(); + if (elemWriter_.get()) { + elemWriter_->mergeStripeStatsIntoFileStats(); } } void MapColumnWriter::getFileStatistics(std::vector& stats) const { ColumnWriter::getFileStatistics(stats); - if (keyWriter.get()) { - keyWriter->getFileStatistics(stats); + if (keyWriter_.get()) { + keyWriter_->getFileStatistics(stats); } - if (elemWriter.get()) { - elemWriter->getFileStatistics(stats); + if (elemWriter_.get()) { + elemWriter_->getFileStatistics(stats); } } void MapColumnWriter::mergeRowGroupStatsIntoStripeStats() { ColumnWriter::mergeRowGroupStatsIntoStripeStats(); - if (keyWriter.get()) { - keyWriter->mergeRowGroupStatsIntoStripeStats(); + if (keyWriter_.get()) { + keyWriter_->mergeRowGroupStatsIntoStripeStats(); } - if (elemWriter.get()) { - elemWriter->mergeRowGroupStatsIntoStripeStats(); + if (elemWriter_.get()) { + elemWriter_->mergeRowGroupStatsIntoStripeStats(); } } void MapColumnWriter::createRowIndexEntry() { ColumnWriter::createRowIndexEntry(); - if (keyWriter.get()) { - keyWriter->createRowIndexEntry(); + if (keyWriter_.get()) { + keyWriter_->createRowIndexEntry(); } - if (elemWriter.get()) { - elemWriter->createRowIndexEntry(); + if (elemWriter_.get()) { + elemWriter_->createRowIndexEntry(); } } void MapColumnWriter::recordPosition() const { ColumnWriter::recordPosition(); - lengthEncoder->recordPosition(rowIndexPosition.get()); + lengthEncoder_->recordPosition(rowIndexPosition.get()); } void MapColumnWriter::reset() { ColumnWriter::reset(); - if (keyWriter) { - keyWriter->reset(); + if (keyWriter_) { + keyWriter_->reset(); } - if (elemWriter) { - elemWriter->reset(); + if (elemWriter_) { + elemWriter_->reset(); } } void MapColumnWriter::writeDictionary() { - if (keyWriter) { - keyWriter->writeDictionary(); + if (keyWriter_) { + keyWriter_->writeDictionary(); } - if (elemWriter) { - elemWriter->writeDictionary(); + if (elemWriter_) { + elemWriter_->writeDictionary(); } } @@ -2645,8 +2645,8 @@ namespace orc { virtual void reset() override; private: - std::unique_ptr rleEncoder; - std::vector> children; + std::unique_ptr rleEncoder_; + std::vector> children_; }; UnionColumnWriter::UnionColumnWriter(const Type& type, const StreamsFactory& factory, @@ -2654,10 +2654,10 @@ namespace orc { : ColumnWriter(type, factory, options) { std::unique_ptr dataStream = factory.createStream(proto::Stream_Kind_DATA); - rleEncoder = createByteRleEncoder(std::move(dataStream)); + rleEncoder_ = createByteRleEncoder(std::move(dataStream)); for (uint64_t i = 0; i != type.getSubtypeCount(); ++i) { - children.push_back(buildWriter(*type.getSubtype(i), factory, options)); + children_.push_back(buildWriter(*type.getSubtype(i), factory, options)); } if (enableIndex) { @@ -2678,8 +2678,8 @@ namespace orc { unsigned char* tags = unionBatch->tags.data() + offset; uint64_t* offsets = unionBatch->offsets.data() + offset; - std::vector childOffset(children.size(), -1); - std::vector childLength(children.size(), 0); + std::vector childOffset(children_.size(), -1); + std::vector childLength(children_.size(), 0); for (uint64_t i = 0; i != numValues; ++i) { if (childOffset[tags[i]] == -1) { @@ -2688,11 +2688,11 @@ namespace orc { ++childLength[tags[i]]; } - rleEncoder->add(reinterpret_cast(tags), numValues, notNull); + rleEncoder_->add(reinterpret_cast(tags), numValues, notNull); - for (uint32_t i = 0; i < children.size(); ++i) { + for (uint32_t i = 0; i < children_.size(); ++i) { if (childLength[i] > 0) { - children[i]->add(*unionBatch->children[i], static_cast(childOffset[i]), + children_[i]->add(*unionBatch->children[i], static_cast(childOffset[i]), childLength[i], nullptr); } } @@ -2725,26 +2725,26 @@ namespace orc { proto::Stream stream; stream.set_kind(proto::Stream_Kind_DATA); stream.set_column(static_cast(columnId)); - stream.set_length(rleEncoder->flush()); + stream.set_length(rleEncoder_->flush()); streams.push_back(stream); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->flush(streams); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->flush(streams); } } void UnionColumnWriter::writeIndex(std::vector& streams) const { ColumnWriter::writeIndex(streams); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->writeIndex(streams); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->writeIndex(streams); } } uint64_t UnionColumnWriter::getEstimatedSize() const { uint64_t size = ColumnWriter::getEstimatedSize(); - size += rleEncoder->getBufferSize(); - for (uint32_t i = 0; i < children.size(); ++i) { - size += children[i]->getEstimatedSize(); + size += rleEncoder_->getBufferSize(); + for (uint32_t i = 0; i < children_.size(); ++i) { + size += children_[i]->getEstimatedSize(); } return size; } @@ -2757,61 +2757,61 @@ namespace orc { encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->getColumnEncoding(encodings); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->getColumnEncoding(encodings); } } void UnionColumnWriter::getStripeStatistics(std::vector& stats) const { ColumnWriter::getStripeStatistics(stats); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->getStripeStatistics(stats); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->getStripeStatistics(stats); } } void UnionColumnWriter::mergeStripeStatsIntoFileStats() { ColumnWriter::mergeStripeStatsIntoFileStats(); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->mergeStripeStatsIntoFileStats(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->mergeStripeStatsIntoFileStats(); } } void UnionColumnWriter::getFileStatistics(std::vector& stats) const { ColumnWriter::getFileStatistics(stats); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->getFileStatistics(stats); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->getFileStatistics(stats); } } void UnionColumnWriter::mergeRowGroupStatsIntoStripeStats() { ColumnWriter::mergeRowGroupStatsIntoStripeStats(); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->mergeRowGroupStatsIntoStripeStats(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->mergeRowGroupStatsIntoStripeStats(); } } void UnionColumnWriter::createRowIndexEntry() { ColumnWriter::createRowIndexEntry(); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->createRowIndexEntry(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->createRowIndexEntry(); } } void UnionColumnWriter::recordPosition() const { ColumnWriter::recordPosition(); - rleEncoder->recordPosition(rowIndexPosition.get()); + rleEncoder_->recordPosition(rowIndexPosition.get()); } void UnionColumnWriter::reset() { ColumnWriter::reset(); - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->reset(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->reset(); } } void UnionColumnWriter::writeDictionary() { - for (uint32_t i = 0; i < children.size(); ++i) { - children[i]->writeDictionary(); + for (uint32_t i = 0; i < children_.size(); ++i) { + children_[i]->writeDictionary(); } } diff --git a/c++/src/ColumnWriter.hh b/c++/src/ColumnWriter.hh index f21ffd6f834..8afd1eb72c7 100644 --- a/c++/src/ColumnWriter.hh +++ b/c++/src/ColumnWriter.hh @@ -53,14 +53,14 @@ namespace orc { public: virtual ~RowIndexPositionRecorder() override; - RowIndexPositionRecorder(proto::RowIndexEntry& entry) : rowIndexEntry(entry) {} + RowIndexPositionRecorder(proto::RowIndexEntry& entry) : rowIndexEntry_(entry) {} virtual void add(uint64_t pos) override { - rowIndexEntry.add_positions(pos); + rowIndexEntry_.add_positions(pos); } private: - proto::RowIndexEntry& rowIndexEntry; + proto::RowIndexEntry& rowIndexEntry_; }; /** diff --git a/c++/src/Common.cc b/c++/src/Common.cc index cf2ff27ef14..52efa12d94a 100644 --- a/c++/src/Common.cc +++ b/c++/src/Common.cc @@ -133,11 +133,11 @@ namespace orc { } std::string FileVersion::toString() const { - if (majorVersion == 1 && minorVersion == 9999) { + if (majorVersion_ == 1 && minorVersion_ == 9999) { return "UNSTABLE-PRE-2.0"; } std::stringstream ss; - ss << majorVersion << '.' << minorVersion; + ss << majorVersion_ << '.' << minorVersion_; return ss.str(); } diff --git a/c++/src/Compression.cc b/c++/src/Compression.cc index 94be774ab48..4002276e18a 100644 --- a/c++/src/Compression.cc +++ b/c++/src/Compression.cc @@ -245,7 +245,7 @@ namespace orc { private: void init(); void end(); - z_stream strm; + z_stream strm_; }; ZlibCompressionStream::ZlibCompressionStream(OutputStream* outStream, int compressionLevel, @@ -256,12 +256,12 @@ namespace orc { } uint64_t ZlibCompressionStream::doStreamingCompression() { - if (deflateReset(&strm) != Z_OK) { + if (deflateReset(&strm_) != Z_OK) { throw std::runtime_error("Failed to reset inflate."); } - strm.avail_in = static_cast(bufferSize); - strm.next_in = rawInputBuffer.data(); + strm_.avail_in = static_cast(bufferSize); + strm_.next_in = rawInputBuffer.data(); do { if (outputPosition >= outputSize) { @@ -270,11 +270,11 @@ namespace orc { } outputPosition = 0; } - strm.next_out = reinterpret_cast(outputBuffer + outputPosition); - strm.avail_out = static_cast(outputSize - outputPosition); + strm_.next_out = reinterpret_cast(outputBuffer + outputPosition); + strm_.avail_out = static_cast(outputSize - outputPosition); - int ret = deflate(&strm, Z_FINISH); - outputPosition = outputSize - static_cast(strm.avail_out); + int ret = deflate(&strm_, Z_FINISH); + outputPosition = outputSize - static_cast(strm_.avail_out); if (ret == Z_STREAM_END) { break; @@ -283,9 +283,9 @@ namespace orc { } else { throw std::runtime_error("Failed to deflate input data."); } - } while (strm.avail_out == 0); + } while (strm_.avail_out == 0); - return strm.total_out; + return strm_.total_out; } std::string ZlibCompressionStream::getName() const { @@ -299,18 +299,18 @@ namespace orc { #endif void ZlibCompressionStream::init() { - strm.zalloc = nullptr; - strm.zfree = nullptr; - strm.opaque = nullptr; - strm.next_in = nullptr; + strm_.zalloc = nullptr; + strm_.zfree = nullptr; + strm_.opaque = nullptr; + strm_.next_in = nullptr; - if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) { + if (deflateInit2(&strm_, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) { throw std::runtime_error("Error while calling deflateInit2() for zlib."); } } void ZlibCompressionStream::end() { - (void)deflateEnd(&strm); + (void)deflateEnd(&strm_); } DIAGNOSTIC_PUSH @@ -399,9 +399,9 @@ namespace orc { }; DecompressionStream::DecompressionStream(std::unique_ptr inStream, - size_t bufferSize, MemoryPool& _pool, - ReaderMetrics* _metrics) - : pool(_pool), + size_t bufferSize, MemoryPool& pool, + ReaderMetrics* metrics) + : pool(pool), input(std::move(inStream)), outputDataBuffer(pool, bufferSize), state(DECOMPRESS_HEADER), @@ -416,7 +416,7 @@ namespace orc { headerPosition(0), inputBufferStartPosition(0), bytesReturned(0), - metrics(_metrics) {} + metrics(metrics) {} std::string DecompressionStream::getStreamName() const { return input->getName(); @@ -622,7 +622,7 @@ namespace orc { virtual void NextDecompress(const void** data, int* size, size_t availableSize) override; private: - z_stream zstream; + z_stream zstream_; }; DIAGNOSTIC_PUSH @@ -632,17 +632,17 @@ namespace orc { #endif ZlibDecompressionStream::ZlibDecompressionStream(std::unique_ptr inStream, - size_t bufferSize, MemoryPool& _pool, - ReaderMetrics* _metrics) - : DecompressionStream(std::move(inStream), bufferSize, _pool, _metrics) { - zstream.next_in = nullptr; - zstream.avail_in = 0; - zstream.zalloc = nullptr; - zstream.zfree = nullptr; - zstream.opaque = nullptr; - zstream.next_out = reinterpret_cast(outputDataBuffer.data()); - zstream.avail_out = static_cast(outputDataBuffer.capacity()); - int64_t result = inflateInit2(&zstream, -15); + size_t bufferSize, MemoryPool& pool, + ReaderMetrics* metrics) + : DecompressionStream(std::move(inStream), bufferSize, pool, metrics) { + zstream_.next_in = nullptr; + zstream_.avail_in = 0; + zstream_.zalloc = nullptr; + zstream_.zfree = nullptr; + zstream_.opaque = nullptr; + zstream_.next_out = reinterpret_cast(outputDataBuffer.data()); + zstream_.avail_out = static_cast(outputDataBuffer.capacity()); + int64_t result = inflateInit2(&zstream_, -15); switch (result) { case Z_OK: break; @@ -660,7 +660,7 @@ namespace orc { DIAGNOSTIC_POP ZlibDecompressionStream::~ZlibDecompressionStream() { - int64_t result = inflateEnd(&zstream); + int64_t result = inflateEnd(&zstream_); if (result != Z_OK) { // really can't throw in destructors std::cout << "Error in ~ZlibDecompressionStream() " << result << "\n"; @@ -668,19 +668,19 @@ namespace orc { } void ZlibDecompressionStream::NextDecompress(const void** data, int* size, size_t availableSize) { - zstream.next_in = reinterpret_cast(const_cast(inputBuffer)); - zstream.avail_in = static_cast(availableSize); + zstream_.next_in = reinterpret_cast(const_cast(inputBuffer)); + zstream_.avail_in = static_cast(availableSize); outputBuffer = outputDataBuffer.data(); - zstream.next_out = reinterpret_cast(const_cast(outputBuffer)); - zstream.avail_out = static_cast(outputDataBuffer.capacity()); - if (inflateReset(&zstream) != Z_OK) { + zstream_.next_out = reinterpret_cast(const_cast(outputBuffer)); + zstream_.avail_out = static_cast(outputDataBuffer.capacity()); + if (inflateReset(&zstream_) != Z_OK) { throw std::logic_error( "Bad inflateReset in " "ZlibDecompressionStream::NextDecompress"); } int64_t result; do { - result = inflate(&zstream, availableSize == remainingLength ? Z_FINISH : Z_SYNC_FLUSH); + result = inflate(&zstream_, availableSize == remainingLength ? Z_FINISH : Z_SYNC_FLUSH); switch (result) { case Z_OK: remainingLength -= availableSize; @@ -688,8 +688,8 @@ namespace orc { readBuffer(true); availableSize = std::min(static_cast(inputBufferEnd - inputBuffer), remainingLength); - zstream.next_in = reinterpret_cast(const_cast(inputBuffer)); - zstream.avail_in = static_cast(availableSize); + zstream_.next_in = reinterpret_cast(const_cast(inputBuffer)); + zstream_.avail_in = static_cast(availableSize); break; case Z_STREAM_END: break; @@ -711,7 +711,7 @@ namespace orc { "ZlibDecompressionStream::NextDecompress"); } } while (result != Z_STREAM_END); - *size = static_cast(outputDataBuffer.capacity() - zstream.avail_out); + *size = static_cast(outputDataBuffer.capacity() - zstream_.avail_out); *data = outputBuffer; outputBufferLength = 0; outputBuffer += *size; @@ -742,14 +742,14 @@ namespace orc { private: // may need to stitch together multiple input buffers; // to give snappy a contiguous block - DataBuffer inputDataBuffer; + DataBuffer inputDataBuffer_; }; BlockDecompressionStream::BlockDecompressionStream(std::unique_ptr inStream, - size_t blockSize, MemoryPool& _pool, - ReaderMetrics* _metrics) - : DecompressionStream(std::move(inStream), blockSize, _pool, _metrics), - inputDataBuffer(pool, blockSize) {} + size_t blockSize, MemoryPool& pool, + ReaderMetrics* metrics) + : DecompressionStream(std::move(inStream), blockSize, pool, metrics), + inputDataBuffer_(pool, blockSize) {} void BlockDecompressionStream::NextDecompress(const void** data, int* size, size_t availableSize) { @@ -759,18 +759,18 @@ namespace orc { inputBuffer += availableSize; } else { // Did not read enough from input. - if (inputDataBuffer.capacity() < remainingLength) { - inputDataBuffer.resize(remainingLength); + if (inputDataBuffer_.capacity() < remainingLength) { + inputDataBuffer_.resize(remainingLength); } - ::memcpy(inputDataBuffer.data(), inputBuffer, availableSize); + ::memcpy(inputDataBuffer_.data(), inputBuffer, availableSize); inputBuffer += availableSize; - compressed = inputDataBuffer.data(); + compressed = inputDataBuffer_.data(); for (size_t pos = availableSize; pos < remainingLength;) { readBuffer(true); size_t avail = std::min(static_cast(inputBufferEnd - inputBuffer), remainingLength - pos); - ::memcpy(inputDataBuffer.data() + pos, inputBuffer, avail); + ::memcpy(inputDataBuffer_.data() + pos, inputBuffer, avail); pos += avail; inputBuffer += avail; } @@ -788,8 +788,8 @@ namespace orc { class SnappyDecompressionStream : public BlockDecompressionStream { public: SnappyDecompressionStream(std::unique_ptr inStream, size_t blockSize, - MemoryPool& _pool, ReaderMetrics* _metrics) - : BlockDecompressionStream(std::move(inStream), blockSize, _pool, _metrics) { + MemoryPool& pool, ReaderMetrics* metrics) + : BlockDecompressionStream(std::move(inStream), blockSize, pool, metrics) { // PASS } @@ -804,10 +804,10 @@ namespace orc { size_t maxOutputLength) override; }; - uint64_t SnappyDecompressionStream::decompress(const char* _input, uint64_t length, char* output, + uint64_t SnappyDecompressionStream::decompress(const char* input, uint64_t length, char* output, size_t maxOutputLength) { size_t outLength; - if (!snappy::GetUncompressedLength(_input, length, &outLength)) { + if (!snappy::GetUncompressedLength(input, length, &outLength)) { throw ParseError("SnappyDecompressionStream choked on corrupt input"); } @@ -815,7 +815,7 @@ namespace orc { throw std::logic_error("Snappy length exceeds block size"); } - if (!snappy::RawUncompress(_input, length, output)) { + if (!snappy::RawUncompress(input, length, output)) { throw ParseError("SnappyDecompressionStream choked on corrupt input"); } return outLength; @@ -824,8 +824,8 @@ namespace orc { class LzoDecompressionStream : public BlockDecompressionStream { public: LzoDecompressionStream(std::unique_ptr inStream, size_t blockSize, - MemoryPool& _pool, ReaderMetrics* _metrics) - : BlockDecompressionStream(std::move(inStream), blockSize, _pool, _metrics) { + MemoryPool& pool, ReaderMetrics* metrics) + : BlockDecompressionStream(std::move(inStream), blockSize, pool, metrics) { // PASS } @@ -848,8 +848,8 @@ namespace orc { class Lz4DecompressionStream : public BlockDecompressionStream { public: Lz4DecompressionStream(std::unique_ptr inStream, size_t blockSize, - MemoryPool& _pool, ReaderMetrics* _metrics) - : BlockDecompressionStream(std::move(inStream), blockSize, _pool, _metrics) { + MemoryPool& pool, ReaderMetrics* metrics) + : BlockDecompressionStream(std::move(inStream), blockSize, pool, metrics) { // PASS } @@ -967,12 +967,12 @@ namespace orc { private: void init(); void end(); - LZ4_stream_t* state; + LZ4_stream_t* state_; }; uint64_t Lz4CompressionSteam::doBlockCompression() { int result = LZ4_compress_fast_extState( - static_cast(state), reinterpret_cast(rawInputBuffer.data()), + static_cast(state_), reinterpret_cast(rawInputBuffer.data()), reinterpret_cast(compressorBuffer.data()), bufferSize, static_cast(compressorBuffer.size()), level); if (result == 0) { @@ -982,15 +982,15 @@ namespace orc { } void Lz4CompressionSteam::init() { - state = LZ4_createStream(); - if (!state) { + state_ = LZ4_createStream(); + if (!state_) { throw std::runtime_error("Error while allocating state for lz4."); } } void Lz4CompressionSteam::end() { - (void)LZ4_freeStream(state); - state = nullptr; + (void)LZ4_freeStream(state_); + state_ = nullptr; } /** @@ -1055,11 +1055,11 @@ namespace orc { private: void init(); void end(); - ZSTD_CCtx* cctx; + ZSTD_CCtx* cctx_; }; uint64_t ZSTDCompressionStream::doBlockCompression() { - return ZSTD_compressCCtx(cctx, compressorBuffer.data(), compressorBuffer.size(), + return ZSTD_compressCCtx(cctx_, compressorBuffer.data(), compressorBuffer.size(), rawInputBuffer.data(), static_cast(bufferSize), level); } @@ -1070,15 +1070,15 @@ namespace orc { #endif void ZSTDCompressionStream::init() { - cctx = ZSTD_createCCtx(); - if (!cctx) { + cctx_ = ZSTD_createCCtx(); + if (!cctx_) { throw std::runtime_error("Error while calling ZSTD_createCCtx() for zstd."); } } void ZSTDCompressionStream::end() { - (void)ZSTD_freeCCtx(cctx); - cctx = nullptr; + (void)ZSTD_freeCCtx(cctx_); + cctx_ = nullptr; } DIAGNOSTIC_PUSH @@ -1089,8 +1089,8 @@ namespace orc { class ZSTDDecompressionStream : public BlockDecompressionStream { public: ZSTDDecompressionStream(std::unique_ptr inStream, size_t blockSize, - MemoryPool& _pool, ReaderMetrics* _metrics) - : BlockDecompressionStream(std::move(inStream), blockSize, _pool, _metrics) { + MemoryPool& pool, ReaderMetrics* metrics) + : BlockDecompressionStream(std::move(inStream), blockSize, pool, metrics) { this->init(); } @@ -1111,13 +1111,13 @@ namespace orc { private: void init(); void end(); - ZSTD_DCtx* dctx; + ZSTD_DCtx* dctx_; }; uint64_t ZSTDDecompressionStream::decompress(const char* inputPtr, uint64_t length, char* output, size_t maxOutputLength) { return static_cast( - ZSTD_decompressDCtx(dctx, output, maxOutputLength, inputPtr, length)); + ZSTD_decompressDCtx(dctx_, output, maxOutputLength, inputPtr, length)); } DIAGNOSTIC_PUSH @@ -1127,15 +1127,15 @@ namespace orc { #endif void ZSTDDecompressionStream::init() { - dctx = ZSTD_createDCtx(); - if (!dctx) { + dctx_ = ZSTD_createDCtx(); + if (!dctx_) { throw std::runtime_error("Error while calling ZSTD_createDCtx() for zstd."); } } void ZSTDDecompressionStream::end() { - (void)ZSTD_freeDCtx(dctx); - dctx = nullptr; + (void)ZSTD_freeDCtx(dctx_); + dctx_ = nullptr; } DIAGNOSTIC_PUSH diff --git a/c++/src/ConvertColumnReader.cc b/c++/src/ConvertColumnReader.cc index d94d1861a14..27cd567ad6a 100644 --- a/c++/src/ConvertColumnReader.cc +++ b/c++/src/ConvertColumnReader.cc @@ -23,9 +23,9 @@ namespace orc { // Assume that we are using tight numeric vector batch using BooleanVectorBatch = ByteVectorBatch; - ConvertColumnReader::ConvertColumnReader(const Type& _readType, const Type& fileType, - StripeStreams& stripe, bool _throwOnOverflow) - : ColumnReader(_readType, stripe), readType(_readType), throwOnOverflow(_throwOnOverflow) { + ConvertColumnReader::ConvertColumnReader(const Type& readTypeValue, const Type& fileType, + StripeStreams& stripe, bool throwOnOverflowValue) + : ColumnReader(readTypeValue, stripe), readType(readTypeValue), throwOnOverflow(throwOnOverflowValue) { reader = buildReader(fileType, stripe, /*useTightNumericVector=*/true, /*throwOnOverflow=*/false, /*convertToReadType*/ false); data = @@ -135,9 +135,9 @@ namespace orc { template class NumericConvertColumnReader : public ConvertColumnReader { public: - NumericConvertColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe, - bool _throwOnOverflow) - : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {} + NumericConvertColumnReader(const Type& readTypeValue, const Type& fileType, StripeStreams& stripe, + bool throwOnOverflowValue) + : ConvertColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) {} void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override { ConvertColumnReader::next(rowBatch, numValues, notNull); @@ -164,9 +164,9 @@ namespace orc { class NumericConvertColumnReader : public ConvertColumnReader { public: - NumericConvertColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe, - bool _throwOnOverflow) - : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {} + NumericConvertColumnReader(const Type& readTypeValue, const Type& fileType, StripeStreams& stripe, + bool throwOnOverflowValue) + : ConvertColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) {} void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override { ConvertColumnReader::next(rowBatch, numValues, notNull); @@ -188,9 +188,9 @@ namespace orc { class ConvertToStringVariantColumnReader : public ConvertColumnReader { public: - ConvertToStringVariantColumnReader(const Type& _readType, const Type& fileType, - StripeStreams& stripe, bool _throwOnOverflow) - : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {} + ConvertToStringVariantColumnReader(const Type& readTypeValue, const Type& fileType, + StripeStreams& stripe, bool throwOnOverflowValue) + : ConvertColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) {} void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override; @@ -225,19 +225,19 @@ namespace orc { class BooleanToStringVariantColumnReader : public ConvertToStringVariantColumnReader { public: - BooleanToStringVariantColumnReader(const Type& _readType, const Type& fileType, - StripeStreams& stripe, bool _throwOnOverflow) - : ConvertToStringVariantColumnReader(_readType, fileType, stripe, _throwOnOverflow) { - trueValue = "TRUE"; - falseValue = "FALSE"; - if (readType.getKind() == CHAR || readType.getKind() == VARCHAR) { - if (readType.getMaximumLength() < 5) { + BooleanToStringVariantColumnReader(const Type& readTypeValue, const Type& fileType, + StripeStreams& stripe, bool throwOnOverflowValue) + : ConvertToStringVariantColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) { + trueValue_ = "TRUE"; + falseValue_ = "FALSE"; + if (readTypeValue.getKind() == CHAR || readTypeValue.getKind() == VARCHAR) { + if (readTypeValue.getMaximumLength() < 5) { throw SchemaEvolutionError("Invalid maximum length for boolean type: " + - std::to_string(readType.getMaximumLength())); + std::to_string(readTypeValue.getMaximumLength())); } - if (readType.getKind() == CHAR) { - trueValue.resize(readType.getMaximumLength(), ' '); - falseValue.resize(readType.getMaximumLength(), ' '); + if (readTypeValue.getKind() == CHAR) { + trueValue_.resize(readTypeValue.getMaximumLength(), ' '); + falseValue_.resize(readTypeValue.getMaximumLength(), ' '); } } } @@ -245,8 +245,8 @@ namespace orc { uint64_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) override; private: - std::string trueValue; - std::string falseValue; + std::string trueValue_; + std::string falseValue_; }; uint64_t BooleanToStringVariantColumnReader::convertToStrBuffer(ColumnVectorBatch& rowBatch, @@ -257,7 +257,7 @@ namespace orc { // cast the bool value to string for (uint64_t i = 0; i < numValues; ++i) { if (!rowBatch.hasNulls || rowBatch.notNull[i]) { - strBuffer[i] = (srcBatch.data[i] ? trueValue : falseValue); + strBuffer[i] = (srcBatch.data[i] ? trueValue_ : falseValue_); size += strBuffer[i].size(); } } @@ -267,9 +267,9 @@ namespace orc { template class NumericToStringVariantColumnReader : public ConvertToStringVariantColumnReader { public: - NumericToStringVariantColumnReader(const Type& _readType, const Type& fileType, - StripeStreams& stripe, bool _throwOnOverflow) - : ConvertToStringVariantColumnReader(_readType, fileType, stripe, _throwOnOverflow) {} + NumericToStringVariantColumnReader(const Type& readTypeValue, const Type& fileType, + StripeStreams& stripe, bool throwOnOverflowValue) + : ConvertToStringVariantColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) {} uint64_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) override; }; @@ -321,13 +321,13 @@ namespace orc { template class NumericToDecimalColumnReader : public ConvertColumnReader { public: - NumericToDecimalColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe, - bool _throwOnOverflow) - : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) { - precision = static_cast(readType.getPrecision()); - scale = static_cast(readType.getScale()); + NumericToDecimalColumnReader(const Type& readTypeValue, const Type& fileType, StripeStreams& stripe, + bool throwOnOverflowValue) + : ConvertColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) { + precision_ = static_cast(readTypeValue.getPrecision()); + scale_ = static_cast(readTypeValue.getScale()); bool overflow = false; - upperBound = scaleUpInt128ByPowerOfTen(1, precision, overflow); + upperBound_ = scaleUpInt128ByPowerOfTen(1, precision_, overflow); } void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override { @@ -335,8 +335,8 @@ namespace orc { const auto& srcBatch = *SafeCastBatchTo(data.get()); auto& dstBatch = *SafeCastBatchTo(&rowBatch); - dstBatch.precision = precision; - dstBatch.scale = scale; + dstBatch.precision = precision_; + dstBatch.scale = scale_; for (uint64_t i = 0; i < numValues; ++i) { if (!rowBatch.hasNulls || rowBatch.notNull[i]) { if constexpr (isFloatingFileType) { @@ -351,7 +351,7 @@ namespace orc { private: template void convertDoubleToDecimal(ReadTypeBatch& dstBatch, uint64_t idx, SrcType value) { - const auto result = convertDecimal(value, precision, scale); + const auto result = convertDecimal(value, precision_, scale_); Int128 i128 = result.second; if (result.first) { handleOverflow(dstBatch, idx, throwOnOverflow); @@ -372,7 +372,7 @@ namespace orc { template void convertIntegerToDecimal(ReadTypeBatch& dstBatch, uint64_t idx, SrcType value) { int fromScale = 0; - auto result = convertDecimal(value, fromScale, precision, scale); + auto result = convertDecimal(value, fromScale, precision_, scale_); if (result.first) { handleOverflow(dstBatch, idx, throwOnOverflow); } else { @@ -388,18 +388,18 @@ namespace orc { } } - int32_t precision; - int32_t scale; - int64_t scaleMultiplier; - Int128 upperBound; + int32_t precision_; + int32_t scale_; + int64_t scaleMultiplier_; + Int128 upperBound_; }; class ConvertToTimestampColumnReader : public ConvertColumnReader { public: - ConvertToTimestampColumnReader(const Type& _readType, const Type& fileType, - StripeStreams& stripe, bool _throwOnOverflow) - : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow), - readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ? &getTimezoneByName("GMT") + ConvertToTimestampColumnReader(const Type& readTypeValue, const Type& fileType, + StripeStreams& stripe, bool throwOnOverflowValue) + : ConvertColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue), + readerTimezone(readTypeValue.getKind() == TIMESTAMP_INSTANT ? &getTimezoneByName("GMT") : &stripe.getReaderTimezone()), needConvertTimezone(readerTimezone != &getTimezoneByName("GMT")) {} @@ -419,9 +419,9 @@ namespace orc { template class NumericToTimestampColumnReader : public ConvertToTimestampColumnReader { public: - NumericToTimestampColumnReader(const Type& _readType, const Type& fileType, - StripeStreams& stripe, bool _throwOnOverflow) - : ConvertToTimestampColumnReader(_readType, fileType, stripe, _throwOnOverflow) {} + NumericToTimestampColumnReader(const Type& readTypeValue, const Type& fileType, + StripeStreams& stripe, bool throwOnOverflowValue) + : ConvertToTimestampColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) {} void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override { ConvertToTimestampColumnReader::next(rowBatch, numValues, notNull); @@ -469,14 +469,14 @@ namespace orc { template class DecimalToNumericColumnReader : public ConvertColumnReader { public: - DecimalToNumericColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe, - bool _throwOnOverflow) - : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) { - precision = fileType.getPrecision(); - scale = fileType.getScale(); - factor = 1; - for (int i = 0; i < scale; i++) { - factor *= 10; + DecimalToNumericColumnReader(const Type& readTypeValue, const Type& fileType, StripeStreams& stripe, + bool throwOnOverflowValue) + : ConvertColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) { + precision_ = fileType.getPrecision(); + scale_ = fileType.getScale(); + factor_ = 1; + for (int i = 0; i < scale_; i++) { + factor_ *= 10; } } @@ -500,7 +500,7 @@ namespace orc { void convertDecimalToInteger(ReadTypeBatch& dstBatch, uint64_t idx, const FileTypeBatch& srcBatch) { using FileType = decltype(srcBatch.values[idx]); - Int128 result = scaleDownInt128ByPowerOfTen(srcBatch.values[idx], scale); + Int128 result = scaleDownInt128ByPowerOfTen(srcBatch.values[idx], scale_); if (!result.fitsInLong()) { handleOverflow(dstBatch, idx, throwOnOverflow); return; @@ -512,21 +512,21 @@ namespace orc { void convertDecimalToDouble(ReadTypeBatch& dstBatch, uint64_t idx, const FileTypeBatch& srcBatch) { double doubleValue = Int128(srcBatch.values[idx]).toDouble(); - dstBatch.data[idx] = static_cast(doubleValue) / static_cast(factor); + dstBatch.data[idx] = static_cast(doubleValue) / static_cast(factor_); } - int32_t precision; - int32_t scale; - int64_t factor; + int32_t precision_; + int32_t scale_; + int64_t factor_; }; template class DecimalToNumericColumnReader : public ConvertColumnReader { public: - DecimalToNumericColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe, - bool _throwOnOverflow) - : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) {} + DecimalToNumericColumnReader(const Type& readTypeValue, const Type& fileType, StripeStreams& stripe, + bool throwOnOverflowValue) + : ConvertColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) {} void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override { ConvertColumnReader::next(rowBatch, numValues, notNull); @@ -544,13 +544,13 @@ namespace orc { template class DecimalConvertColumnReader : public ConvertColumnReader { public: - DecimalConvertColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe, - bool _throwOnOverflow) - : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow) { - fromPrecision = fileType.getPrecision(); - fromScale = fileType.getScale(); - toPrecision = _readType.getPrecision(); - toScale = _readType.getScale(); + DecimalConvertColumnReader(const Type& readTypeValue, const Type& fileType, StripeStreams& stripe, + bool throwOnOverflowValue) + : ConvertColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue) { + fromPrecision_ = fileType.getPrecision(); + fromScale_ = fileType.getScale(); + toPrecision_ = readTypeValue.getPrecision(); + toScale_ = readTypeValue.getScale(); } void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override { @@ -572,7 +572,7 @@ namespace orc { using ReadType = decltype(dstBatch.values[idx]); auto [overflows, resultI128] = - convertDecimal(srcBatch.values[idx], fromScale, toPrecision, toScale); + convertDecimal(srcBatch.values[idx], fromScale_, toPrecision_, toScale_); if (overflows) { handleOverflow(dstBatch, idx, throwOnOverflow); } @@ -587,20 +587,20 @@ namespace orc { } } - int32_t fromPrecision; - int32_t fromScale; - int32_t toPrecision; - int32_t toScale; + int32_t fromPrecision_; + int32_t fromScale_; + int32_t toPrecision_; + int32_t toScale_; }; template class DecimalToTimestampColumnReader : public ConvertToTimestampColumnReader { public: - DecimalToTimestampColumnReader(const Type& _readType, const Type& fileType, - StripeStreams& stripe, bool _throwOnOverflow) - : ConvertToTimestampColumnReader(_readType, fileType, stripe, _throwOnOverflow), - precision(static_cast(fileType.getPrecision())), - scale(static_cast(fileType.getScale())) {} + DecimalToTimestampColumnReader(const Type& readTypeValue, const Type& fileType, + StripeStreams& stripe, bool throwOnOverflowValue) + : ConvertToTimestampColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue), + precision_(static_cast(fileType.getPrecision())), + scale_(static_cast(fileType.getScale())) {} void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override { ConvertColumnReader::next(rowBatch, numValues, notNull); @@ -626,18 +626,18 @@ namespace orc { bool overflow = false; Int128 i128(srcBatch.values[idx]); - Int128 integerPortion = scaleDownInt128ByPowerOfTen(i128, scale); + Int128 integerPortion = scaleDownInt128ByPowerOfTen(i128, scale_); if (integerPortion < MIN_EPOCH_SECONDS || integerPortion > MAX_EPOCH_SECONDS) { handleOverflow(dstBatch, idx, throwOnOverflow); return; } - i128 -= scaleUpInt128ByPowerOfTen(integerPortion, scale, overflow); + i128 -= scaleUpInt128ByPowerOfTen(integerPortion, scale_, overflow); Int128 fractionPortion = std::move(i128); - if (scale < SecondToNanoFactor) { + if (scale_ < SecondToNanoFactor) { fractionPortion = - scaleUpInt128ByPowerOfTen(fractionPortion, SecondToNanoFactor - scale, overflow); + scaleUpInt128ByPowerOfTen(fractionPortion, SecondToNanoFactor - scale_, overflow); } else { - fractionPortion = scaleDownInt128ByPowerOfTen(fractionPortion, scale - SecondToNanoFactor); + fractionPortion = scaleDownInt128ByPowerOfTen(fractionPortion, scale_ - SecondToNanoFactor); } if (fractionPortion < 0) { fractionPortion += 1e9; @@ -652,17 +652,17 @@ namespace orc { } } - const int32_t precision; - const int32_t scale; + const int32_t precision_; + const int32_t scale_; }; template class DecimalToStringVariantColumnReader : public ConvertToStringVariantColumnReader { public: - DecimalToStringVariantColumnReader(const Type& _readType, const Type& fileType, - StripeStreams& stripe, bool _throwOnOverflow) - : ConvertToStringVariantColumnReader(_readType, fileType, stripe, _throwOnOverflow), - scale(fileType.getScale()) {} + DecimalToStringVariantColumnReader(const Type& readTypeValue, const Type& fileType, + StripeStreams& stripe, bool throwOnOverflowValue) + : ConvertToStringVariantColumnReader(readTypeValue, fileType, stripe, throwOnOverflowValue), + scale_(fileType.getScale()) {} uint64_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) override { uint64_t size = 0; @@ -671,7 +671,7 @@ namespace orc { if (readType.getKind() == STRING) { for (uint64_t i = 0; i < rowBatch.numElements; ++i) { if (!rowBatch.hasNulls || rowBatch.notNull[i]) { - strBuffer[i] = Int128(srcBatch.values[i]).toDecimalString(scale, true); + strBuffer[i] = Int128(srcBatch.values[i]).toDecimalString(scale_, true); size += strBuffer[i].size(); } } @@ -679,7 +679,7 @@ namespace orc { const auto maxLength = readType.getMaximumLength(); for (uint64_t i = 0; i < rowBatch.numElements; ++i) { if (!rowBatch.hasNulls || rowBatch.notNull[i]) { - strBuffer[i] = Int128(srcBatch.values[i]).toDecimalString(scale, true); + strBuffer[i] = Int128(srcBatch.values[i]).toDecimalString(scale_, true); } if (strBuffer[i].size() > maxLength) { strBuffer[i].resize(maxLength); @@ -691,7 +691,7 @@ namespace orc { } private: - const int32_t scale; + const int32_t scale_; }; #define DEFINE_NUMERIC_CONVERT_READER(FROM, TO, TYPE) \ @@ -835,7 +835,7 @@ namespace orc { DEFINE_DECIMAL_CONVERT_TO_STRING_VARINT_READER(Varchar) #define CREATE_READER(NAME) \ - return std::make_unique(_readType, fileType, stripe, throwOnOverflow); + return std::make_unique(readType, fileType, stripe, throwOnOverflow); #define CASE_CREATE_READER(TYPE, CONVERT) \ case TYPE: \ @@ -858,7 +858,7 @@ namespace orc { #define CASE_CREATE_DECIMAL_READER(FROM) \ case DECIMAL: { \ - if (isDecimal64(_readType)) { \ + if (isDecimal64(readType)) { \ CREATE_READER(FROM##ToDecimal64ColumnReader) \ } else { \ CREATE_READER(FROM##ToDecimal128ColumnReader) \ @@ -868,7 +868,7 @@ namespace orc { #define CASE_EXCEPTION \ default: \ throw SchemaEvolutionError("Cannot convert from " + fileType.toString() + " to " + \ - _readType.toString()); + readType.toString()); std::unique_ptr buildConvertReader(const Type& fileType, StripeStreams& stripe, bool useTightNumericVector, @@ -878,11 +878,11 @@ namespace orc { "SchemaEvolution only support tight vector, please create ColumnVectorBatch with " "option useTightNumericVector"); } - const auto& _readType = *stripe.getSchemaEvolution()->getReadType(fileType); + const auto& readType = *stripe.getSchemaEvolution()->getReadType(fileType); switch (fileType.getKind()) { case BOOLEAN: { - switch (_readType.getKind()) { + switch (readType.getKind()) { CASE_CREATE_READER(BYTE, BooleanToByte) CASE_CREATE_READER(SHORT, BooleanToShort) CASE_CREATE_READER(INT, BooleanToInt) @@ -906,7 +906,7 @@ namespace orc { } } case BYTE: { - switch (_readType.getKind()) { + switch (readType.getKind()) { CASE_CREATE_READER(BOOLEAN, ByteToBoolean) CASE_CREATE_READER(SHORT, ByteToShort) CASE_CREATE_READER(INT, ByteToInt) @@ -930,7 +930,7 @@ namespace orc { } } case SHORT: { - switch (_readType.getKind()) { + switch (readType.getKind()) { CASE_CREATE_READER(BOOLEAN, ShortToBoolean) CASE_CREATE_READER(BYTE, ShortToByte) CASE_CREATE_READER(INT, ShortToInt) @@ -954,7 +954,7 @@ namespace orc { } } case INT: { - switch (_readType.getKind()) { + switch (readType.getKind()) { CASE_CREATE_READER(BOOLEAN, IntToBoolean) CASE_CREATE_READER(BYTE, IntToByte) CASE_CREATE_READER(SHORT, IntToShort) @@ -978,7 +978,7 @@ namespace orc { } } case LONG: { - switch (_readType.getKind()) { + switch (readType.getKind()) { CASE_CREATE_READER(BOOLEAN, LongToBoolean) CASE_CREATE_READER(BYTE, LongToByte) CASE_CREATE_READER(SHORT, LongToShort) @@ -1002,7 +1002,7 @@ namespace orc { } } case FLOAT: { - switch (_readType.getKind()) { + switch (readType.getKind()) { CASE_CREATE_READER(BOOLEAN, FloatToBoolean) CASE_CREATE_READER(BYTE, FloatToByte) CASE_CREATE_READER(SHORT, FloatToShort) @@ -1026,7 +1026,7 @@ namespace orc { } } case DOUBLE: { - switch (_readType.getKind()) { + switch (readType.getKind()) { CASE_CREATE_READER(BOOLEAN, DoubleToBoolean) CASE_CREATE_READER(BYTE, DoubleToByte) CASE_CREATE_READER(SHORT, DoubleToShort) @@ -1050,7 +1050,7 @@ namespace orc { } } case DECIMAL: { - switch (_readType.getKind()) { + switch (readType.getKind()) { CASE_CREATE_FROM_DECIMAL_READER(BOOLEAN, Boolean) CASE_CREATE_FROM_DECIMAL_READER(BYTE, Byte) CASE_CREATE_FROM_DECIMAL_READER(SHORT, Short) @@ -1065,13 +1065,13 @@ namespace orc { CASE_CREATE_FROM_DECIMAL_READER(TIMESTAMP_INSTANT, Timestamp) case DECIMAL: { if (isDecimal64(fileType)) { - if (isDecimal64(_readType)) { + if (isDecimal64(readType)) { CREATE_READER(Decimal64ToDecimal64ColumnReader) } else { CREATE_READER(Decimal64ToDecimal128ColumnReader) } } else { - if (isDecimal64(_readType)) { + if (isDecimal64(readType)) { CREATE_READER(Decimal128ToDecimal64ColumnReader) } else { CREATE_READER(Decimal128ToDecimal128ColumnReader) diff --git a/c++/src/CpuInfoUtil.cc b/c++/src/CpuInfoUtil.cc index 7e6958deef1..82669de20a4 100644 --- a/c++/src/CpuInfoUtil.cc +++ b/c++/src/CpuInfoUtil.cc @@ -375,11 +375,11 @@ namespace orc { return flags; } - void OsRetrieveCacheSize(std::array* cache_sizes) { + void OsRetrieveCacheSize(std::array* cacheSizes) { for (int i = 0; i < kCacheLevels; ++i) { const int64_t cache_size = LinuxGetCacheSize(i); if (cache_size > 0) { - (*cache_sizes)[i] = cache_size; + (*cacheSizes)[i] = cache_size; } } } @@ -403,8 +403,8 @@ namespace orc { } // Read from /proc/cpuinfo - void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor, - std::string* model_name) { + void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor, + std::string* modelName) { std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in); while (cpuinfo) { std::string line; @@ -414,9 +414,9 @@ namespace orc { const std::string name = TrimString(line.substr(0, colon - 1)); const std::string value = TrimString(line.substr(colon + 1, std::string::npos)); if (name.compare("flags") == 0 || name.compare("Features") == 0) { - *hardware_flags |= LinuxParseCpuFlags(value); + *hardwareFlags |= LinuxParseCpuFlags(value); } else if (name.compare("model name") == 0) { - *model_name = value; + *modelName = value; } else if (name.compare("vendor_id") == 0) { if (value.compare("GenuineIntel") == 0) { *vendor = CpuInfo::Vendor::Intel; @@ -433,7 +433,7 @@ namespace orc { #if defined(CPUINFO_ARCH_X86) //------------------------------ X86_64 ------------------------------// - bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_flags) { + bool ArchParseUserSimdLevel(const std::string& simdLevel, int64_t* hardwareFlags) { enum { USER_SIMD_NONE, USER_SIMD_AVX512, @@ -442,9 +442,9 @@ namespace orc { int level = USER_SIMD_MAX; // Parse the level - if (simd_level == "AVX512") { + if (simdLevel == "AVX512") { level = USER_SIMD_AVX512; - } else if (simd_level == "NONE") { + } else if (simdLevel == "NONE") { level = USER_SIMD_NONE; } else { return false; @@ -452,7 +452,7 @@ namespace orc { // Disable feature as the level if (level < USER_SIMD_AVX512) { - *hardware_flags &= ~CpuInfo::AVX512; + *hardwareFlags &= ~CpuInfo::AVX512; } return true; } diff --git a/c++/src/Exceptions.cc b/c++/src/Exceptions.cc index 23703ff3244..30ecf7dc7cc 100644 --- a/c++/src/Exceptions.cc +++ b/c++/src/Exceptions.cc @@ -20,11 +20,11 @@ namespace orc { - NotImplementedYet::NotImplementedYet(const std::string& what_arg) : logic_error(what_arg) { + NotImplementedYet::NotImplementedYet(const std::string& whatArg) : logic_error(whatArg) { // PASS } - NotImplementedYet::NotImplementedYet(const char* what_arg) : logic_error(what_arg) { + NotImplementedYet::NotImplementedYet(const char* whatArg) : logic_error(whatArg) { // PASS } @@ -36,11 +36,11 @@ namespace orc { // PASS } - ParseError::ParseError(const std::string& what_arg) : runtime_error(what_arg) { + ParseError::ParseError(const std::string& whatArg) : runtime_error(whatArg) { // PASS } - ParseError::ParseError(const char* what_arg) : runtime_error(what_arg) { + ParseError::ParseError(const char* whatArg) : runtime_error(whatArg) { // PASS } @@ -52,11 +52,11 @@ namespace orc { // PASS } - InvalidArgument::InvalidArgument(const std::string& what_arg) : runtime_error(what_arg) { + InvalidArgument::InvalidArgument(const std::string& whatArg) : runtime_error(whatArg) { // PASS } - InvalidArgument::InvalidArgument(const char* what_arg) : runtime_error(what_arg) { + InvalidArgument::InvalidArgument(const char* whatArg) : runtime_error(whatArg) { // PASS } @@ -68,11 +68,11 @@ namespace orc { // PASS } - SchemaEvolutionError::SchemaEvolutionError(const std::string& what_arg) : logic_error(what_arg) { + SchemaEvolutionError::SchemaEvolutionError(const std::string& whatArg) : logic_error(whatArg) { // PASS } - SchemaEvolutionError::SchemaEvolutionError(const char* what_arg) : logic_error(what_arg) { + SchemaEvolutionError::SchemaEvolutionError(const char* whatArg) : logic_error(whatArg) { // PASS } diff --git a/c++/src/Int128.cc b/c++/src/Int128.cc index 3c159f37750..4a1d0b763a3 100644 --- a/c++/src/Int128.cc +++ b/c++/src/Int128.cc @@ -35,8 +35,8 @@ namespace orc { } Int128::Int128(const std::string& str) { - lowbits = 0; - highbits = 0; + lowbits_ = 0; + highbits_ = 0; size_t length = str.length(); if (length > 0) { bool isNegative = str[0] == '-'; @@ -64,30 +64,30 @@ namespace orc { // Break the left and right numbers into 32 bit chunks // so that we can multiply them without overflow. - uint64_t L0 = static_cast(highbits) >> 32; - uint64_t L1 = static_cast(highbits) & INT_MASK; - uint64_t L2 = lowbits >> 32; - uint64_t L3 = lowbits & INT_MASK; - uint64_t R0 = static_cast(right.highbits) >> 32; - uint64_t R1 = static_cast(right.highbits) & INT_MASK; - uint64_t R2 = right.lowbits >> 32; - uint64_t R3 = right.lowbits & INT_MASK; + uint64_t L0 = static_cast(highbits_) >> 32; + uint64_t L1 = static_cast(highbits_) & INT_MASK; + uint64_t L2 = lowbits_ >> 32; + uint64_t L3 = lowbits_ & INT_MASK; + uint64_t R0 = static_cast(right.highbits_) >> 32; + uint64_t R1 = static_cast(right.highbits_) & INT_MASK; + uint64_t R2 = right.lowbits_ >> 32; + uint64_t R3 = right.lowbits_ & INT_MASK; uint64_t product = L3 * R3; - lowbits = product & INT_MASK; + lowbits_ = product & INT_MASK; uint64_t sum = product >> 32; product = L2 * R3; sum += product; - highbits = sum < product ? CARRY_BIT : 0; + highbits_ = sum < product ? CARRY_BIT : 0; product = L3 * R2; sum += product; if (sum < product) { - highbits += CARRY_BIT; + highbits_ += CARRY_BIT; } - lowbits += sum << 32; - highbits += static_cast(sum >> 32); - highbits += L1 * R3 + L2 * R2 + L3 * R1; - highbits += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << 32; + lowbits_ += sum << 32; + highbits_ += static_cast(sum >> 32); + highbits_ += L1 * R3 + L2 * R2 + L3 * R1; + highbits_ += (L0 * R3 + L1 * R2 + L2 * R1 + L3 * R0) << 32; return *this; } @@ -103,16 +103,16 @@ namespace orc { int64_t Int128::fillInArray(uint32_t* array, bool& wasNegative) const { uint64_t high; uint64_t low; - if (highbits < 0) { - low = ~lowbits + 1; - high = static_cast(~highbits); + if (highbits_ < 0) { + low = ~lowbits_ + 1; + high = static_cast(~highbits_); if (low == 0) { high += 1; } wasNegative = true; } else { - low = lowbits; - high = static_cast(highbits); + low = lowbits_; + high = static_cast(highbits_); wasNegative = false; } if (high != 0) { @@ -430,8 +430,8 @@ namespace orc { std::string Int128::toHexString() const { std::stringstream buf; - buf << std::hex << "0x" << std::setw(16) << std::setfill('0') << highbits << std::setw(16) - << std::setfill('0') << lowbits; + buf << std::hex << "0x" << std::setw(16) << std::setfill('0') << highbits_ << std::setw(16) + << std::setfill('0') << lowbits_; return buf.str(); } @@ -439,7 +439,7 @@ namespace orc { if (fitsInLong()) { return static_cast(toLong()); } - return static_cast(lowbits) + std::ldexp(static_cast(highbits), 64); + return static_cast(lowbits_) + std::ldexp(static_cast(highbits_), 64); } const static int32_t MAX_PRECISION_64 = 18; diff --git a/c++/src/MemoryPool.cc b/c++/src/MemoryPool.cc index 8c8837aa642..ed7fee7373b 100644 --- a/c++/src/MemoryPool.cc +++ b/c++/src/MemoryPool.cc @@ -53,72 +53,72 @@ namespace orc { template DataBuffer::DataBuffer(MemoryPool& pool, uint64_t newSize) - : memoryPool(pool), buf(nullptr), currentSize(0), currentCapacity(0) { + : memoryPool_(pool), buf_(nullptr), currentSize_(0), currentCapacity_(0) { reserve(newSize); - currentSize = newSize; + currentSize_ = newSize; } template DataBuffer::DataBuffer(DataBuffer&& buffer) noexcept - : memoryPool(buffer.memoryPool), - buf(buffer.buf), - currentSize(buffer.currentSize), - currentCapacity(buffer.currentCapacity) { - buffer.buf = nullptr; - buffer.currentSize = 0; - buffer.currentCapacity = 0; + : memoryPool_(buffer.memoryPool_), + buf_(buffer.buf_), + currentSize_(buffer.currentSize_), + currentCapacity_(buffer.currentCapacity_) { + buffer.buf_ = nullptr; + buffer.currentSize_ = 0; + buffer.currentCapacity_ = 0; } template DataBuffer::~DataBuffer() { - for (uint64_t i = currentSize; i > 0; --i) { - (buf + i - 1)->~T(); + for (uint64_t i = currentSize_; i > 0; --i) { + (buf_ + i - 1)->~T(); } - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (currentSize > newSize) { - for (uint64_t i = currentSize; i > newSize; --i) { - (buf + i - 1)->~T(); + if (currentSize_ > newSize) { + for (uint64_t i = currentSize_; i > newSize; --i) { + (buf_ + i - 1)->~T(); } - } else if (newSize > currentSize) { - for (uint64_t i = currentSize; i < newSize; ++i) { - new (buf + i) T(); + } else if (newSize > currentSize_) { + for (uint64_t i = currentSize_; i < newSize; ++i) { + new (buf_ + i) T(); } } - currentSize = newSize; + currentSize_ = newSize; } template void DataBuffer::reserve(uint64_t newCapacity) { - if (newCapacity > currentCapacity || !buf) { - if (buf) { - T* buf_old = buf; - buf = reinterpret_cast(memoryPool.malloc(sizeof(T) * newCapacity)); - memcpy(buf, buf_old, sizeof(T) * currentSize); - memoryPool.free(reinterpret_cast(buf_old)); + if (newCapacity > currentCapacity_ || !buf_) { + if (buf_) { + T* buf_old = buf_; + buf_ = reinterpret_cast(memoryPool_.malloc(sizeof(T) * newCapacity)); + memcpy(buf_, buf_old, sizeof(T) * currentSize_); + memoryPool_.free(reinterpret_cast(buf_old)); } else { - buf = reinterpret_cast(memoryPool.malloc(sizeof(T) * newCapacity)); + buf_ = reinterpret_cast(memoryPool_.malloc(sizeof(T) * newCapacity)); } - currentCapacity = newCapacity; + currentCapacity_ = newCapacity; } } template void DataBuffer::zeroOut() { - memset(buf, 0, sizeof(T) * currentCapacity); + memset(buf_, 0, sizeof(T) * currentCapacity_); } // Specializations for Int128 template <> void DataBuffer::zeroOut() { - for (uint64_t i = 0; i < currentCapacity; ++i) { - new (buf + i) Int128(); + for (uint64_t i = 0; i < currentCapacity_; ++i) { + new (buf_ + i) Int128(); } } @@ -126,180 +126,180 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, newSize - currentSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, newSize - currentSize_); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for char* template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(char*)); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(char*)); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for double template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(double)); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(double)); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for float template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(float)); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(float)); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for int64_t template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int64_t)); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int64_t)); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for int32_t template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int32_t)); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int32_t)); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for int16_t template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int16_t)); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int16_t)); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for int8_t template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(int8_t)); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int8_t)); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for uint64_t template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, (newSize - currentSize) * sizeof(uint64_t)); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(uint64_t)); } - currentSize = newSize; + currentSize_ = newSize; } // Specializations for unsigned char template <> DataBuffer::~DataBuffer() { - if (buf) { - memoryPool.free(reinterpret_cast(buf)); + if (buf_) { + memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { reserve(newSize); - if (newSize > currentSize) { - memset(buf + currentSize, 0, newSize - currentSize); + if (newSize > currentSize_) { + memset(buf_ + currentSize_, 0, newSize - currentSize_); } - currentSize = newSize; + currentSize_ = newSize; } #ifdef __clang__ diff --git a/c++/src/Options.hh b/c++/src/Options.hh index 51cd8efd64e..97ffb143b5f 100644 --- a/c++/src/Options.hh +++ b/c++/src/Options.hh @@ -52,23 +52,23 @@ namespace orc { } }; - ReaderOptions::ReaderOptions() : privateBits(std::make_unique()) { + ReaderOptions::ReaderOptions() : privateBits_(std::make_unique()) { // PASS } ReaderOptions::ReaderOptions(const ReaderOptions& rhs) - : privateBits(std::make_unique(*(rhs.privateBits.get()))) { + : privateBits_(std::make_unique(*(rhs.privateBits_.get()))) { // PASS } ReaderOptions::ReaderOptions(ReaderOptions& rhs) { - // swap privateBits with rhs - privateBits.swap(rhs.privateBits); + // swap privateBits_ with rhs + privateBits_.swap(rhs.privateBits_); } ReaderOptions& ReaderOptions::operator=(const ReaderOptions& rhs) { if (this != &rhs) { - privateBits.reset(new ReaderOptionsPrivate(*(rhs.privateBits.get()))); + privateBits_.reset(new ReaderOptionsPrivate(*(rhs.privateBits_.get()))); } return *this; } @@ -78,48 +78,48 @@ namespace orc { } ReaderOptions& ReaderOptions::setMemoryPool(MemoryPool& pool) { - privateBits->memoryPool = &pool; + privateBits_->memoryPool = &pool; return *this; } MemoryPool* ReaderOptions::getMemoryPool() const { - return privateBits->memoryPool; + return privateBits_->memoryPool; } ReaderOptions& ReaderOptions::setReaderMetrics(ReaderMetrics* metrics) { - privateBits->metrics = metrics; + privateBits_->metrics = metrics; return *this; } ReaderMetrics* ReaderOptions::getReaderMetrics() const { - return privateBits->metrics; + return privateBits_->metrics; } ReaderOptions& ReaderOptions::setTailLocation(uint64_t offset) { - privateBits->tailLocation = offset; + privateBits_->tailLocation = offset; return *this; } uint64_t ReaderOptions::getTailLocation() const { - return privateBits->tailLocation; + return privateBits_->tailLocation; } ReaderOptions& ReaderOptions::setSerializedFileTail(const std::string& value) { - privateBits->serializedTail = value; + privateBits_->serializedTail = value; return *this; } std::string ReaderOptions::getSerializedFileTail() const { - return privateBits->serializedTail; + return privateBits_->serializedTail; } ReaderOptions& ReaderOptions::setErrorStream(std::ostream& stream) { - privateBits->errorStream = &stream; + privateBits_->errorStream = &stream; return *this; } std::ostream* ReaderOptions::getErrorStream() const { - return privateBits->errorStream; + return privateBits_->errorStream; } /** @@ -155,23 +155,23 @@ namespace orc { } }; - RowReaderOptions::RowReaderOptions() : privateBits(std::make_unique()) { + RowReaderOptions::RowReaderOptions() : privateBits_(std::make_unique()) { // PASS } RowReaderOptions::RowReaderOptions(const RowReaderOptions& rhs) - : privateBits(std::make_unique(*(rhs.privateBits.get()))) { + : privateBits_(std::make_unique(*(rhs.privateBits_.get()))) { // PASS } RowReaderOptions::RowReaderOptions(RowReaderOptions& rhs) { - // swap privateBits with rhs - privateBits.swap(rhs.privateBits); + // swap privateBits_ with rhs + privateBits_.swap(rhs.privateBits_); } RowReaderOptions& RowReaderOptions::operator=(const RowReaderOptions& rhs) { if (this != &rhs) { - privateBits.reset(new RowReaderOptionsPrivate(*(rhs.privateBits.get()))); + privateBits_.reset(new RowReaderOptionsPrivate(*(rhs.privateBits_.get()))); } return *this; } @@ -181,150 +181,150 @@ namespace orc { } RowReaderOptions& RowReaderOptions::include(const std::list& include) { - privateBits->selection = ColumnSelection_FIELD_IDS; - privateBits->includedColumnIndexes.assign(include.begin(), include.end()); - privateBits->includedColumnNames.clear(); - privateBits->idReadIntentMap.clear(); + privateBits_->selection = ColumnSelection_FIELD_IDS; + privateBits_->includedColumnIndexes.assign(include.begin(), include.end()); + privateBits_->includedColumnNames.clear(); + privateBits_->idReadIntentMap.clear(); return *this; } RowReaderOptions& RowReaderOptions::include(const std::list& include) { - privateBits->selection = ColumnSelection_NAMES; - privateBits->includedColumnNames.assign(include.begin(), include.end()); - privateBits->includedColumnIndexes.clear(); - privateBits->idReadIntentMap.clear(); + privateBits_->selection = ColumnSelection_NAMES; + privateBits_->includedColumnNames.assign(include.begin(), include.end()); + privateBits_->includedColumnIndexes.clear(); + privateBits_->idReadIntentMap.clear(); return *this; } RowReaderOptions& RowReaderOptions::includeTypes(const std::list& types) { - privateBits->selection = ColumnSelection_TYPE_IDS; - privateBits->includedColumnIndexes.assign(types.begin(), types.end()); - privateBits->includedColumnNames.clear(); - privateBits->idReadIntentMap.clear(); + privateBits_->selection = ColumnSelection_TYPE_IDS; + privateBits_->includedColumnIndexes.assign(types.begin(), types.end()); + privateBits_->includedColumnNames.clear(); + privateBits_->idReadIntentMap.clear(); return *this; } RowReaderOptions& RowReaderOptions::includeTypesWithIntents( const IdReadIntentMap& idReadIntentMap) { - privateBits->selection = ColumnSelection_TYPE_IDS; - privateBits->includedColumnIndexes.clear(); - privateBits->idReadIntentMap.clear(); + privateBits_->selection = ColumnSelection_TYPE_IDS; + privateBits_->includedColumnIndexes.clear(); + privateBits_->idReadIntentMap.clear(); for (const auto& typeIntentPair : idReadIntentMap) { - privateBits->idReadIntentMap[typeIntentPair.first] = typeIntentPair.second; - privateBits->includedColumnIndexes.push_back(typeIntentPair.first); + privateBits_->idReadIntentMap[typeIntentPair.first] = typeIntentPair.second; + privateBits_->includedColumnIndexes.push_back(typeIntentPair.first); } - privateBits->includedColumnNames.clear(); + privateBits_->includedColumnNames.clear(); return *this; } RowReaderOptions& RowReaderOptions::range(uint64_t offset, uint64_t length) { - privateBits->dataStart = offset; - privateBits->dataLength = length; + privateBits_->dataStart = offset; + privateBits_->dataLength = length; return *this; } bool RowReaderOptions::getIndexesSet() const { - return privateBits->selection == ColumnSelection_FIELD_IDS; + return privateBits_->selection == ColumnSelection_FIELD_IDS; } bool RowReaderOptions::getTypeIdsSet() const { - return privateBits->selection == ColumnSelection_TYPE_IDS; + return privateBits_->selection == ColumnSelection_TYPE_IDS; } const std::list& RowReaderOptions::getInclude() const { - return privateBits->includedColumnIndexes; + return privateBits_->includedColumnIndexes; } bool RowReaderOptions::getNamesSet() const { - return privateBits->selection == ColumnSelection_NAMES; + return privateBits_->selection == ColumnSelection_NAMES; } const std::list& RowReaderOptions::getIncludeNames() const { - return privateBits->includedColumnNames; + return privateBits_->includedColumnNames; } uint64_t RowReaderOptions::getOffset() const { - return privateBits->dataStart; + return privateBits_->dataStart; } uint64_t RowReaderOptions::getLength() const { - return privateBits->dataLength; + return privateBits_->dataLength; } RowReaderOptions& RowReaderOptions::throwOnHive11DecimalOverflow(bool shouldThrow) { - privateBits->throwOnHive11DecimalOverflow = shouldThrow; + privateBits_->throwOnHive11DecimalOverflow = shouldThrow; return *this; } bool RowReaderOptions::getThrowOnHive11DecimalOverflow() const { - return privateBits->throwOnHive11DecimalOverflow; + return privateBits_->throwOnHive11DecimalOverflow; } RowReaderOptions& RowReaderOptions::throwOnSchemaEvolutionOverflow(bool shouldThrow) { - privateBits->throwOnSchemaEvolutionOverflow = shouldThrow; + privateBits_->throwOnSchemaEvolutionOverflow = shouldThrow; return *this; } bool RowReaderOptions::getThrowOnSchemaEvolutionOverflow() const { - return privateBits->throwOnSchemaEvolutionOverflow; + return privateBits_->throwOnSchemaEvolutionOverflow; } RowReaderOptions& RowReaderOptions::forcedScaleOnHive11Decimal(int32_t forcedScale) { - privateBits->forcedScaleOnHive11Decimal = forcedScale; + privateBits_->forcedScaleOnHive11Decimal = forcedScale; return *this; } int32_t RowReaderOptions::getForcedScaleOnHive11Decimal() const { - return privateBits->forcedScaleOnHive11Decimal; + return privateBits_->forcedScaleOnHive11Decimal; } bool RowReaderOptions::getEnableLazyDecoding() const { - return privateBits->enableLazyDecoding; + return privateBits_->enableLazyDecoding; } RowReaderOptions& RowReaderOptions::setEnableLazyDecoding(bool enable) { - privateBits->enableLazyDecoding = enable; + privateBits_->enableLazyDecoding = enable; return *this; } RowReaderOptions& RowReaderOptions::searchArgument(std::unique_ptr sargs) { - privateBits->sargs = std::move(sargs); + privateBits_->sargs = std::move(sargs); return *this; } std::shared_ptr RowReaderOptions::getSearchArgument() const { - return privateBits->sargs; + return privateBits_->sargs; } RowReaderOptions& RowReaderOptions::setTimezoneName(const std::string& zoneName) { - privateBits->readerTimezone = zoneName; + privateBits_->readerTimezone = zoneName; return *this; } const std::string& RowReaderOptions::getTimezoneName() const { - return privateBits->readerTimezone; + return privateBits_->readerTimezone; } const RowReaderOptions::IdReadIntentMap RowReaderOptions::getIdReadIntentMap() const { - return privateBits->idReadIntentMap; + return privateBits_->idReadIntentMap; } RowReaderOptions& RowReaderOptions::setUseTightNumericVector(bool useTightNumericVector) { - privateBits->useTightNumericVector = useTightNumericVector; + privateBits_->useTightNumericVector = useTightNumericVector; return *this; } bool RowReaderOptions::getUseTightNumericVector() const { - return privateBits->useTightNumericVector; + return privateBits_->useTightNumericVector; } RowReaderOptions& RowReaderOptions::setReadType(std::shared_ptr type) { - privateBits->readType = std::move(type); + privateBits_->readType = std::move(type); return *this; } std::shared_ptr& RowReaderOptions::getReadType() const { - return privateBits->readType; + return privateBits_->readType; } } // namespace orc diff --git a/c++/src/OrcFile.cc b/c++/src/OrcFile.cc index d4b6a86e2f0..8899299d3d8 100644 --- a/c++/src/OrcFile.cc +++ b/c++/src/OrcFile.cc @@ -49,29 +49,29 @@ namespace orc { class FileInputStream : public InputStream { private: - std::string filename; - int file; - uint64_t totalLength; - ReaderMetrics* metrics; + std::string filename_; + int file_; + uint64_t totalLength_; + ReaderMetrics* metrics_; public: - FileInputStream(std::string _filename, ReaderMetrics* _metrics) - : filename(_filename), metrics(_metrics) { - file = open(filename.c_str(), O_BINARY | O_RDONLY); - if (file == -1) { - throw ParseError("Can't open " + filename); + FileInputStream(std::string filename, ReaderMetrics* metrics) + : filename_(filename), metrics_(metrics) { + file_ = open(filename_.c_str(), O_BINARY | O_RDONLY); + if (file_ == -1) { + throw ParseError("Can't open " + filename_); } struct stat fileStat; - if (fstat(file, &fileStat) == -1) { - throw ParseError("Can't stat " + filename); + if (fstat(file_, &fileStat) == -1) { + throw ParseError("Can't stat " + filename_); } - totalLength = static_cast(fileStat.st_size); + totalLength_ = static_cast(fileStat.st_size); } ~FileInputStream() override; uint64_t getLength() const override { - return totalLength; + return totalLength_; } uint64_t getNaturalReadSize() const override { @@ -83,23 +83,23 @@ namespace orc { if (!buf) { throw ParseError("Buffer is null"); } - ssize_t bytesRead = pread(file, buf, length, static_cast(offset)); + ssize_t bytesRead = pread(file_, buf, length, static_cast(offset)); if (bytesRead == -1) { - throw ParseError("Bad read of " + filename); + throw ParseError("Bad read of " + filename_); } if (static_cast(bytesRead) != length) { - throw ParseError("Short read of " + filename); + throw ParseError("Short read of " + filename_); } } const std::string& getName() const override { - return filename; + return filename_; } }; FileInputStream::~FileInputStream() { - close(file); + close(file_); } std::unique_ptr readFile(const std::string& path, ReaderMetrics* metrics) { @@ -126,26 +126,26 @@ namespace orc { class FileOutputStream : public OutputStream { private: - std::string filename; - int file; - uint64_t bytesWritten; - bool closed; + std::string filename_; + int file_; + uint64_t bytesWritten_; + bool closed_; public: - FileOutputStream(std::string _filename) { - bytesWritten = 0; - filename = _filename; - closed = false; - file = open(filename.c_str(), O_BINARY | O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR); - if (file == -1) { - throw ParseError("Can't open " + filename); + FileOutputStream(std::string filename) { + bytesWritten_ = 0; + filename_ = filename; + closed_ = false; + file_ = open(filename_.c_str(), O_BINARY | O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR); + if (file_ == -1) { + throw ParseError("Can't open " + filename_); } } ~FileOutputStream() override; uint64_t getLength() const override { - return bytesWritten; + return bytesWritten_; } uint64_t getNaturalWriteSize() const override { @@ -153,41 +153,41 @@ namespace orc { } void write(const void* buf, size_t length) override { - if (closed) { + if (closed_) { throw std::logic_error("Cannot write to closed stream."); } - ssize_t bytesWrite = ::write(file, buf, length); + ssize_t bytesWrite = ::write(file_, buf, length); if (bytesWrite == -1) { - throw ParseError("Bad write of " + filename); + throw ParseError("Bad write of " + filename_); } if (static_cast(bytesWrite) != length) { - throw ParseError("Short write of " + filename); + throw ParseError("Short write of " + filename_); } - bytesWritten += static_cast(bytesWrite); + bytesWritten_ += static_cast(bytesWrite); } const std::string& getName() const override { - return filename; + return filename_; } void close() override { - if (!closed) { - ::close(file); - closed = true; + if (!closed_) { + ::close(file_); + closed_ = true; } } void flush() override { - if (!closed) { - ::fsync(file); + if (!closed_) { + ::fsync(file_); } } }; FileOutputStream::~FileOutputStream() { - if (!closed) { - ::close(file); - closed = true; + if (!closed_) { + ::close(file_); + closed_ = true; } } diff --git a/c++/src/RLE.hh b/c++/src/RLE.hh index 51f9b6f58a8..12c7c5e6987 100644 --- a/c++/src/RLE.hh +++ b/c++/src/RLE.hh @@ -105,7 +105,7 @@ namespace orc { // must be non-inline! virtual ~RleDecoder(); - RleDecoder(ReaderMetrics* _metrics) : metrics(_metrics) { + RleDecoder(ReaderMetrics* metricsValue) : metrics(metricsValue) { // pass } diff --git a/c++/src/RLEv1.cc b/c++/src/RLEv1.cc index b221e8b8aa8..5d6f6006690 100644 --- a/c++/src/RLEv1.cc +++ b/c++/src/RLEv1.cc @@ -38,9 +38,9 @@ namespace orc { RleEncoderV1::RleEncoderV1(std::unique_ptr outStream, bool hasSigned) : RleEncoder(std::move(outStream), hasSigned) { literals = new int64_t[MAX_LITERAL_SIZE]; - delta = 0; - repeat = false; - tailRunLength = 0; + delta_ = 0; + repeat_ = false; + tailRunLength_ = 0; } RleEncoderV1::~RleEncoderV1() { @@ -49,9 +49,9 @@ namespace orc { void RleEncoderV1::writeValues() { if (numLiterals != 0) { - if (repeat) { + if (repeat_) { writeByte(static_cast(static_cast(numLiterals) - MINIMUM_REPEAT)); - writeByte(static_cast(delta)); + writeByte(static_cast(delta_)); if (isSigned) { writeVslong(literals[0]); } else { @@ -67,9 +67,9 @@ namespace orc { } } } - repeat = false; + repeat_ = false; numLiterals = 0; - tailRunLength = 0; + tailRunLength_ = 0; } } @@ -84,9 +84,9 @@ namespace orc { void RleEncoderV1::write(int64_t value) { if (numLiterals == 0) { literals[numLiterals++] = value; - tailRunLength = 1; - } else if (repeat) { - if (value == literals[0] + delta * static_cast(numLiterals)) { + tailRunLength_ = 1; + } else if (repeat_) { + if (value == literals[0] + delta_ * static_cast(numLiterals)) { numLiterals += 1; if (numLiterals == MAXIMUM_REPEAT) { writeValues(); @@ -94,36 +94,36 @@ namespace orc { } else { writeValues(); literals[numLiterals++] = value; - tailRunLength = 1; + tailRunLength_ = 1; } } else { - if (tailRunLength == 1) { - delta = value - literals[numLiterals - 1]; - if (delta < MIN_DELTA || delta > MAX_DELTA) { - tailRunLength = 1; + if (tailRunLength_ == 1) { + delta_ = value - literals[numLiterals - 1]; + if (delta_ < MIN_DELTA || delta_ > MAX_DELTA) { + tailRunLength_ = 1; } else { - tailRunLength = 2; + tailRunLength_ = 2; } - } else if (value == literals[numLiterals - 1] + delta) { - tailRunLength += 1; + } else if (value == literals[numLiterals - 1] + delta_) { + tailRunLength_ += 1; } else { - delta = value - literals[numLiterals - 1]; - if (delta < MIN_DELTA || delta > MAX_DELTA) { - tailRunLength = 1; + delta_ = value - literals[numLiterals - 1]; + if (delta_ < MIN_DELTA || delta_ > MAX_DELTA) { + tailRunLength_ = 1; } else { - tailRunLength = 2; + tailRunLength_ = 2; } } - if (tailRunLength == MINIMUM_REPEAT) { + if (tailRunLength_ == MINIMUM_REPEAT) { if (numLiterals + 1 == MINIMUM_REPEAT) { - repeat = true; + repeat_ = true; numLiterals += 1; } else { numLiterals -= static_cast(MINIMUM_REPEAT - 1); int64_t base = literals[numLiterals]; writeValues(); literals[0] = base; - repeat = true; + repeat_ = true; numLiterals = MINIMUM_REPEAT; } } else { @@ -137,16 +137,16 @@ namespace orc { signed char RleDecoderV1::readByte() { SCOPED_MINUS_STOPWATCH(metrics, DecodingLatencyUs); - if (bufferStart == bufferEnd) { + if (bufferStart_ == bufferEnd_) { int bufferLength; const void* bufferPointer; - if (!inputStream->Next(&bufferPointer, &bufferLength)) { + if (!inputStream_->Next(&bufferPointer, &bufferLength)) { throw ParseError("bad read in readByte"); } - bufferStart = static_cast(bufferPointer); - bufferEnd = bufferStart + bufferLength; + bufferStart_ = static_cast(bufferPointer); + bufferEnd_ = bufferStart_ + bufferLength; } - return static_cast(*(bufferStart++)); + return static_cast(*(bufferStart_++)); } uint64_t RleDecoderV1::readLong() { @@ -177,34 +177,34 @@ namespace orc { void RleDecoderV1::readHeader() { signed char ch = readByte(); if (ch < 0) { - remainingValues = static_cast(-ch); - repeating = false; + remainingValues_ = static_cast(-ch); + repeating_ = false; } else { - remainingValues = static_cast(ch) + MINIMUM_REPEAT; - repeating = true; - delta = readByte(); - value = isSigned ? unZigZag(readLong()) : static_cast(readLong()); + remainingValues_ = static_cast(ch) + MINIMUM_REPEAT; + repeating_ = true; + delta_ = readByte(); + value_ = isSigned_ ? unZigZag(readLong()) : static_cast(readLong()); } } void RleDecoderV1::reset() { - remainingValues = 0; - value = 0; - bufferStart = nullptr; - bufferEnd = nullptr; - delta = 0; - repeating = false; + remainingValues_ = 0; + value_ = 0; + bufferStart_ = nullptr; + bufferEnd_ = nullptr; + delta_ = 0; + repeating_ = false; } RleDecoderV1::RleDecoderV1(std::unique_ptr input, bool hasSigned, - ReaderMetrics* _metrics) - : RleDecoder(_metrics), inputStream(std::move(input)), isSigned(hasSigned) { + ReaderMetrics* metrics) + : RleDecoder(metrics), inputStream_(std::move(input)), isSigned_(hasSigned) { reset(); } void RleDecoderV1::seek(PositionProvider& location) { // move the input stream - inputStream->seek(location); + inputStream_->seek(location); // reset the decoder status and lazily call readHeader() reset(); // skip ahead the given number of records @@ -213,14 +213,14 @@ namespace orc { void RleDecoderV1::skip(uint64_t numValues) { while (numValues > 0) { - if (remainingValues == 0) { + if (remainingValues_ == 0) { readHeader(); } - uint64_t count = std::min(numValues, remainingValues); - remainingValues -= count; + uint64_t count = std::min(numValues, remainingValues_); + remainingValues_ -= count; numValues -= count; - if (repeating) { - value += delta * static_cast(count); + if (repeating_) { + value_ += delta_ * static_cast(count); } else { skipLongs(count); } @@ -240,38 +240,38 @@ namespace orc { } while (position < numValues) { // If we are out of values, read more. - if (remainingValues == 0) { + if (remainingValues_ == 0) { readHeader(); } // How many do we read out of this block? - uint64_t count = std::min(numValues - position, remainingValues); + uint64_t count = std::min(numValues - position, remainingValues_); uint64_t consumed = 0; - if (repeating) { + if (repeating_) { if (notNull) { for (uint64_t i = 0; i < count; ++i) { if (notNull[position + i]) { - data[position + i] = static_cast(value + static_cast(consumed) * delta); + data[position + i] = static_cast(value_ + static_cast(consumed) * delta_); consumed += 1; } } } else { for (uint64_t i = 0; i < count; ++i) { - data[position + i] = static_cast(value + static_cast(i) * delta); + data[position + i] = static_cast(value_ + static_cast(i) * delta_); } consumed = count; } - value += static_cast(consumed) * delta; + value_ += static_cast(consumed) * delta_; } else { if (notNull) { for (uint64_t i = 0; i < count; ++i) { if (notNull[position + i]) { data[position + i] = - isSigned ? static_cast(unZigZag(readLong())) : static_cast(readLong()); + isSigned_ ? static_cast(unZigZag(readLong())) : static_cast(readLong()); ++consumed; } } } else { - if (isSigned) { + if (isSigned_) { for (uint64_t i = 0; i < count; ++i) { data[position + i] = static_cast(unZigZag(readLong())); } @@ -283,7 +283,7 @@ namespace orc { consumed = count; } } - remainingValues -= consumed; + remainingValues_ -= consumed; position += count; // skipNulls() diff --git a/c++/src/RLEv1.hh b/c++/src/RLEv1.hh index fbe6b0f9c69..a2a00c93052 100644 --- a/c++/src/RLEv1.hh +++ b/c++/src/RLEv1.hh @@ -39,9 +39,9 @@ namespace orc { void write(int64_t val) override; private: - int64_t delta; - bool repeat; - uint64_t tailRunLength; + int64_t delta_; + bool repeat_; + uint64_t tailRunLength_; void writeValues(); }; @@ -83,14 +83,14 @@ namespace orc { inline void reset(); - const std::unique_ptr inputStream; - const bool isSigned; - uint64_t remainingValues; - int64_t value; - const char* bufferStart; - const char* bufferEnd; - int64_t delta; - bool repeating; + const std::unique_ptr inputStream_; + const bool isSigned_; + uint64_t remainingValues_; + int64_t value_; + const char* bufferStart_; + const char* bufferEnd_; + int64_t delta_; + bool repeating_; }; } // namespace orc diff --git a/c++/src/RLEv2.hh b/c++/src/RLEv2.hh index 1cee59d0a61..a8e0340e7e7 100644 --- a/c++/src/RLEv2.hh +++ b/c++/src/RLEv2.hh @@ -96,10 +96,10 @@ namespace orc { ~RleEncoderV2() override { delete[] literals; - delete[] gapVsPatchList; - delete[] zigzagLiterals; - delete[] baseRedLiterals; - delete[] adjDeltas; + delete[] gapVsPatchList_; + delete[] zigzagLiterals_; + delete[] baseRedLiterals_; + delete[] adjDeltas_; } /** * Flushing underlying BufferedOutputStream @@ -109,18 +109,18 @@ namespace orc { void write(int64_t val) override; private: - const bool alignedBitPacking; - uint32_t fixedRunLength; - uint32_t variableRunLength; - int64_t prevDelta; - int32_t histgram[HIST_LEN]; + const bool alignedBitPacking_; + uint32_t fixedRunLength_; + uint32_t variableRunLength_; + int64_t prevDelta_; + int32_t histgram_[HIST_LEN]; // The four list below should actually belong to EncodingOption since it only holds temporal // values in write(int64_t val), it is move here for performance consideration. - int64_t* gapVsPatchList; - int64_t* zigzagLiterals; - int64_t* baseRedLiterals; - int64_t* adjDeltas; + int64_t* gapVsPatchList_; + int64_t* zigzagLiterals_; + int64_t* baseRedLiterals_; + int64_t* adjDeltas_; uint32_t getOpCode(EncodingType encoding); int64_t* prepareForDirectOrPatchedBase(EncodingOption& option); @@ -169,39 +169,39 @@ namespace orc { unsigned char readByte(); void setBufStart(const char* start) { - bufferStart = const_cast(start); + bufferStart_ = const_cast(start); } char* getBufStart() { - return bufferStart; + return bufferStart_; } void setBufEnd(const char* end) { - bufferEnd = const_cast(end); + bufferEnd_ = const_cast(end); } char* getBufEnd() { - return bufferEnd; + return bufferEnd_; } uint64_t bufLength() { - return bufferEnd - bufferStart; + return bufferEnd_ - bufferStart_; } void setBitsLeft(const uint32_t bits) { - bitsLeft = bits; + bitsLeft_ = bits; } void setCurByte(const uint32_t byte) { - curByte = byte; + curByte_ = byte; } uint32_t getBitsLeft() { - return bitsLeft; + return bitsLeft_; } uint32_t getCurByte() { - return curByte; + return curByte_; } /** @@ -225,8 +225,8 @@ namespace orc { int64_t* resPatch, uint64_t* patchIdx); void resetReadLongs() { - bitsLeft = 0; - curByte = 0; + bitsLeft_ = 0; + curByte_ = 0; } void resetRun() { @@ -249,17 +249,17 @@ namespace orc { template uint64_t copyDataFromBuffer(T* data, uint64_t offset, uint64_t numValues, const char* notNull); - const std::unique_ptr inputStream; - const bool isSigned; - unsigned char firstByte; - char* bufferStart; - char* bufferEnd; - uint64_t runLength; // Length of the current run - uint64_t runRead; // Number of returned values of the current run - uint32_t bitsLeft; // Used by readLongs when bitSize < 8 - uint32_t curByte; // Used by anything that uses readLongs - DataBuffer unpackedPatch; // Used by PATCHED_BASE - DataBuffer literals; // Values of the current run + const std::unique_ptr inputStream_; + const bool isSigned_; + unsigned char firstByte_; + char* bufferStart_; + char* bufferEnd_; + uint64_t runLength_; // Length of the current run + uint64_t runRead_; // Number of returned values of the current run + uint32_t bitsLeft_; // Used by readLongs when bitSize < 8 + uint32_t curByte_; // Used by anything that uses readLongs + DataBuffer unpackedPatch_; // Used by PATCHED_BASE + DataBuffer literals_; // Values of the current run }; inline void RleDecoderV2::resetBufferStart(uint64_t len, bool resetBuf, uint32_t backupByteLen) { @@ -268,20 +268,20 @@ namespace orc { const void* bufferPointer = nullptr; if (backupByteLen != 0) { - inputStream->BackUp(backupByteLen); + inputStream_->BackUp(backupByteLen); } if (len >= remainingLen && resetBuf) { - if (!inputStream->Next(&bufferPointer, &bufferLength)) { + if (!inputStream_->Next(&bufferPointer, &bufferLength)) { throw ParseError("bad read in RleDecoderV2::resetBufferStart"); } } if (bufferPointer == nullptr) { - bufferStart += len; + bufferStart_ += len; } else { - bufferStart = const_cast(static_cast(bufferPointer)); - bufferEnd = bufferStart + bufferLength; + bufferStart_ = const_cast(static_cast(bufferPointer)); + bufferEnd_ = bufferStart_ + bufferLength; } } } // namespace orc diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index 4e02f171a1e..a5a225a1b90 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -73,11 +73,11 @@ namespace orc { } std::string ColumnSelector::toDotColumnPath() { - if (columns.empty()) { + if (columns_.empty()) { return std::string(); } std::ostringstream columnStream; - std::copy(columns.begin(), columns.end(), + std::copy(columns_.begin(), columns_.end(), std::ostream_iterator(columnStream, ".")); std::string columnPath = columnStream.str(); return columnPath.substr(0, columnPath.length() - 1); @@ -150,15 +150,15 @@ namespace orc { */ void ColumnSelector::buildTypeNameIdMap(const Type* type) { // map - idTypeMap[type->getColumnId()] = type; + idTypeMap_[type->getColumnId()] = type; if (STRUCT == type->getKind()) { for (size_t i = 0; i < type->getSubtypeCount(); ++i) { const std::string& fieldName = type->getFieldName(i); - columns.push_back(fieldName); - nameIdMap[toDotColumnPath()] = type->getSubtype(i)->getColumnId(); + columns_.push_back(fieldName); + nameIdMap_[toDotColumnPath()] = type->getSubtype(i)->getColumnId(); buildTypeNameIdMap(type->getSubtype(i)); - columns.pop_back(); + columns_.pop_back(); } } else { // other non-primitive type @@ -170,13 +170,13 @@ namespace orc { void ColumnSelector::updateSelected(std::vector& selectedColumns, const RowReaderOptions& options) { - selectedColumns.assign(static_cast(contents->footer->types_size()), false); - if (contents->schema->getKind() == STRUCT && options.getIndexesSet()) { + selectedColumns.assign(static_cast(contents_->footer->types_size()), false); + if (contents_->schema->getKind() == STRUCT && options.getIndexesSet()) { for (std::list::const_iterator field = options.getInclude().begin(); field != options.getInclude().end(); ++field) { updateSelectedByFieldId(selectedColumns, *field); } - } else if (contents->schema->getKind() == STRUCT && options.getNamesSet()) { + } else if (contents_->schema->getKind() == STRUCT && options.getNamesSet()) { for (std::list::const_iterator field = options.getIncludeNames().begin(); field != options.getIncludeNames().end(); ++field) { updateSelectedByName(selectedColumns, *field); @@ -188,21 +188,21 @@ namespace orc { updateSelectedByTypeId(selectedColumns, *typeId, idReadIntentMap); } } else { - // default is to select all columns + // default is to select all columns_ std::fill(selectedColumns.begin(), selectedColumns.end(), true); } - selectParents(selectedColumns, *contents->schema.get()); + selectParents(selectedColumns, *contents_->schema.get()); selectedColumns[0] = true; // column 0 is selected by default } void ColumnSelector::updateSelectedByFieldId(std::vector& selectedColumns, uint64_t fieldId) { - if (fieldId < contents->schema->getSubtypeCount()) { - selectChildren(selectedColumns, *contents->schema->getSubtype(fieldId)); + if (fieldId < contents_->schema->getSubtypeCount()) { + selectChildren(selectedColumns, *contents_->schema->getSubtype(fieldId)); } else { std::stringstream buffer; buffer << "Invalid column selected " << fieldId << " out of " - << contents->schema->getSubtypeCount(); + << contents_->schema->getSubtypeCount(); throw ParseError(buffer.str()); } } @@ -215,7 +215,7 @@ namespace orc { std::vector& selectedColumns, uint64_t typeId, const RowReaderOptions::IdReadIntentMap& idReadIntentMap) { if (typeId < selectedColumns.size()) { - const Type& type = *idTypeMap[typeId]; + const Type& type = *idTypeMap_[typeId]; selectChildren(selectedColumns, type, idReadIntentMap); } else { std::stringstream buffer; @@ -226,14 +226,14 @@ namespace orc { void ColumnSelector::updateSelectedByName(std::vector& selectedColumns, const std::string& fieldName) { - std::map::const_iterator ite = nameIdMap.find(fieldName); - if (ite != nameIdMap.end()) { + std::map::const_iterator ite = nameIdMap_.find(fieldName); + if (ite != nameIdMap_.end()) { updateSelectedByTypeId(selectedColumns, ite->second); } else { bool first = true; std::ostringstream ss; ss << "Invalid column selected " << fieldName << ". Valid names are "; - for (auto it = nameIdMap.begin(); it != nameIdMap.end(); ++it) { + for (auto it = nameIdMap_.begin(); it != nameIdMap_.end(); ++it) { if (!first) ss << ", "; ss << it->first; first = false; @@ -242,89 +242,89 @@ namespace orc { } } - ColumnSelector::ColumnSelector(const FileContents* _contents) : contents(_contents) { - buildTypeNameIdMap(contents->schema.get()); + ColumnSelector::ColumnSelector(const FileContents* contents) : contents_(contents) { + buildTypeNameIdMap(contents_->schema.get()); } - RowReaderImpl::RowReaderImpl(std::shared_ptr _contents, + RowReaderImpl::RowReaderImpl(std::shared_ptr contents, const RowReaderOptions& opts) - : localTimezone(getLocalTimezone()), - contents(_contents), - throwOnHive11DecimalOverflow(opts.getThrowOnHive11DecimalOverflow()), - forcedScaleOnHive11Decimal(opts.getForcedScaleOnHive11Decimal()), - footer(contents->footer.get()), - firstRowOfStripe(*contents->pool, 0), - enableEncodedBlock(opts.getEnableLazyDecoding()), - readerTimezone(getTimezoneByName(opts.getTimezoneName())), - schemaEvolution(opts.getReadType(), contents->schema.get()) { + : localTimezone_(getLocalTimezone()), + contents_(contents), + throwOnHive11DecimalOverflow_(opts.getThrowOnHive11DecimalOverflow()), + forcedScaleOnHive11Decimal_(opts.getForcedScaleOnHive11Decimal()), + footer_(contents_->footer.get()), + firstRowOfStripe_(*contents_->pool, 0), + enableEncodedBlock_(opts.getEnableLazyDecoding()), + readerTimezone_(getTimezoneByName(opts.getTimezoneName())), + schemaEvolution_(opts.getReadType(), contents_->schema.get()) { uint64_t numberOfStripes; - numberOfStripes = static_cast(footer->stripes_size()); - currentStripe = numberOfStripes; - lastStripe = 0; - currentRowInStripe = 0; - rowsInCurrentStripe = 0; - numRowGroupsInStripeRange = 0; - useTightNumericVector = opts.getUseTightNumericVector(); - throwOnSchemaEvolutionOverflow = opts.getThrowOnSchemaEvolutionOverflow(); + numberOfStripes = static_cast(footer_->stripes_size()); + currentStripe_ = numberOfStripes; + lastStripe_ = 0; + currentRowInStripe_ = 0; + rowsInCurrentStripe_ = 0; + numRowGroupsInStripeRange_ = 0; + useTightNumericVector_ = opts.getUseTightNumericVector(); + throwOnSchemaEvolutionOverflow_ = opts.getThrowOnSchemaEvolutionOverflow(); uint64_t rowTotal = 0; - firstRowOfStripe.resize(numberOfStripes); + firstRowOfStripe_.resize(numberOfStripes); for (size_t i = 0; i < numberOfStripes; ++i) { - firstRowOfStripe[i] = rowTotal; - proto::StripeInformation stripeInfo = footer->stripes(static_cast(i)); + firstRowOfStripe_[i] = rowTotal; + proto::StripeInformation stripeInfo = footer_->stripes(static_cast(i)); rowTotal += stripeInfo.number_of_rows(); bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() && stripeInfo.offset() < opts.getOffset() + opts.getLength(); if (isStripeInRange) { - if (i < currentStripe) { - currentStripe = i; + if (i < currentStripe_) { + currentStripe_ = i; } - if (i >= lastStripe) { - lastStripe = i + 1; + if (i >= lastStripe_) { + lastStripe_ = i + 1; } - if (footer->row_index_stride() > 0) { - numRowGroupsInStripeRange += - (stripeInfo.number_of_rows() + footer->row_index_stride() - 1) / - footer->row_index_stride(); + if (footer_->row_index_stride() > 0) { + numRowGroupsInStripeRange_ += + (stripeInfo.number_of_rows() + footer_->row_index_stride() - 1) / + footer_->row_index_stride(); } } } - firstStripe = currentStripe; - processingStripe = lastStripe; + firstStripe_ = currentStripe_; + processingStripe_ = lastStripe_; - if (currentStripe == 0) { - previousRow = (std::numeric_limits::max)(); - } else if (currentStripe == numberOfStripes) { - previousRow = footer->number_of_rows(); + if (currentStripe_ == 0) { + previousRow_ = (std::numeric_limits::max)(); + } else if (currentStripe_ == numberOfStripes) { + previousRow_ = footer_->number_of_rows(); } else { - previousRow = firstRowOfStripe[firstStripe] - 1; + previousRow_ = firstRowOfStripe_[firstStripe_] - 1; } - ColumnSelector column_selector(contents.get()); - column_selector.updateSelected(selectedColumns, opts); + ColumnSelector column_selector(contents_.get()); + column_selector.updateSelected(selectedColumns_, opts); // prepare SargsApplier if SearchArgument is available - if (opts.getSearchArgument() && footer->row_index_stride() > 0) { - sargs = opts.getSearchArgument(); - sargsApplier.reset( - new SargsApplier(*contents->schema, sargs.get(), footer->row_index_stride(), - getWriterVersionImpl(_contents.get()), contents->readerMetrics)); + if (opts.getSearchArgument() && footer_->row_index_stride() > 0) { + sargs_ = opts.getSearchArgument(); + sargsApplier_.reset( + new SargsApplier(*contents_->schema, sargs_.get(), footer_->row_index_stride(), + getWriterVersionImpl(contents_.get()), contents_->readerMetrics)); } - skipBloomFilters = hasBadBloomFilters(); + skipBloomFilters_ = hasBadBloomFilters(); } // Check if the file has inconsistent bloom filters. bool RowReaderImpl::hasBadBloomFilters() { // Only C++ writer in old releases could have bad bloom filters. - if (footer->writer() != ORC_CPP_WRITER) return false; + if (footer_->writer() != ORC_CPP_WRITER) return false; // 'softwareVersion' is added in 1.5.13, 1.6.11, and 1.7.0. // 1.6.x releases before 1.6.11 won't have it. On the other side, the C++ writer // supports writing bloom filters since 1.6.0. So files written by the C++ writer // and with 'softwareVersion' unset would have bad bloom filters. - if (!footer->has_software_version()) return true; + if (!footer_->has_software_version()) return true; - const std::string& fullVersion = footer->software_version(); + const std::string& fullVersion = footer_->software_version(); std::string version; // Deal with snapshot versions, e.g. 1.6.12-SNAPSHOT. if (fullVersion.find('-') != std::string::npos) { @@ -341,31 +341,31 @@ namespace orc { } CompressionKind RowReaderImpl::getCompression() const { - return contents->compression; + return contents_->compression; } uint64_t RowReaderImpl::getCompressionSize() const { - return contents->blockSize; + return contents_->blockSize; } const std::vector RowReaderImpl::getSelectedColumns() const { - return selectedColumns; + return selectedColumns_; } const Type& RowReaderImpl::getSelectedType() const { - if (selectedSchema.get() == nullptr) { - selectedSchema = buildSelectedType(contents->schema.get(), selectedColumns); + if (selectedSchema_.get() == nullptr) { + selectedSchema_ = buildSelectedType(contents_->schema.get(), selectedColumns_); } - return *(selectedSchema.get()); + return *(selectedSchema_.get()); } uint64_t RowReaderImpl::getRowNumber() const { - return previousRow; + return previousRow_; } void RowReaderImpl::seekToRow(uint64_t rowNumber) { // Empty file - if (lastStripe == 0) { + if (lastStripe_ == 0) { return; } @@ -375,53 +375,53 @@ namespace orc { // Implement this by setting previousRow to the number of rows in the file. // seeking past lastStripe - uint64_t num_stripes = static_cast(footer->stripes_size()); - if ((lastStripe == num_stripes && rowNumber >= footer->number_of_rows()) || - (lastStripe < num_stripes && rowNumber >= firstRowOfStripe[lastStripe])) { - currentStripe = num_stripes; - previousRow = footer->number_of_rows(); + uint64_t num_stripes = static_cast(footer_->stripes_size()); + if ((lastStripe_ == num_stripes && rowNumber >= footer_->number_of_rows()) || + (lastStripe_ < num_stripes && rowNumber >= firstRowOfStripe_[lastStripe_])) { + currentStripe_ = num_stripes; + previousRow_ = footer_->number_of_rows(); return; } uint64_t seekToStripe = 0; - while (seekToStripe + 1 < lastStripe && firstRowOfStripe[seekToStripe + 1] <= rowNumber) { + while (seekToStripe + 1 < lastStripe_ && firstRowOfStripe_[seekToStripe + 1] <= rowNumber) { seekToStripe++; } // seeking before the first stripe - if (seekToStripe < firstStripe) { - currentStripe = num_stripes; - previousRow = footer->number_of_rows(); + if (seekToStripe < firstStripe_) { + currentStripe_ = num_stripes; + previousRow_ = footer_->number_of_rows(); return; } - previousRow = rowNumber; - auto rowIndexStride = footer->row_index_stride(); - if (!isCurrentStripeInited() || currentStripe != seekToStripe || rowIndexStride == 0 || - currentStripeInfo.index_length() == 0) { + previousRow_ = rowNumber; + auto rowIndexStride = footer_->row_index_stride(); + if (!isCurrentStripeInited() || currentStripe_ != seekToStripe || rowIndexStride == 0 || + currentStripeInfo_.index_length() == 0) { // current stripe is not initialized or // target stripe is not current stripe or // current stripe doesn't have row indexes - currentStripe = seekToStripe; - currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe]; + currentStripe_ = seekToStripe; + currentRowInStripe_ = rowNumber - firstRowOfStripe_[currentStripe_]; startNextStripe(); - if (currentStripe >= lastStripe) { + if (currentStripe_ >= lastStripe_) { return; } } else { - currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe]; - if (sargsApplier) { + currentRowInStripe_ = rowNumber - firstRowOfStripe_[currentStripe_]; + if (sargsApplier_) { // advance to selected row group if predicate pushdown is enabled - currentRowInStripe = - advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, - footer->row_index_stride(), sargsApplier->getNextSkippedRows()); + currentRowInStripe_ = + advanceToNextRowGroup(currentRowInStripe_, rowsInCurrentStripe_, + footer_->row_index_stride(), sargsApplier_->getNextSkippedRows()); } } - uint64_t rowsToSkip = currentRowInStripe; + uint64_t rowsToSkip = currentRowInStripe_; // seek to the target row group if row indexes exists - if (rowIndexStride > 0 && currentStripeInfo.index_length() > 0) { - if (rowIndexes.empty()) { + if (rowIndexStride > 0 && currentStripeInfo_.index_length() > 0) { + if (rowIndexes_.empty()) { loadStripeIndex(); } // TODO(ORC-1175): process the failures of loadStripeIndex() call @@ -432,36 +432,36 @@ namespace orc { // 'reader' is reset in startNextStripe(). It could be nullptr if 'rowsToSkip' is 0, // e.g. when startNextStripe() skips all remaining rows of the file. if (rowsToSkip > 0) { - reader->skip(rowsToSkip); + reader_->skip(rowsToSkip); } } void RowReaderImpl::loadStripeIndex() { // reset all previous row indexes - rowIndexes.clear(); - bloomFilterIndex.clear(); + rowIndexes_.clear(); + bloomFilterIndex_.clear(); - // obtain row indexes for selected columns - uint64_t offset = currentStripeInfo.offset(); - for (int i = 0; i < currentStripeFooter.streams_size(); ++i) { - const proto::Stream& pbStream = currentStripeFooter.streams(i); + // obtain row indexes for selected columns_ + uint64_t offset = currentStripeInfo_.offset(); + for (int i = 0; i < currentStripeFooter_.streams_size(); ++i) { + const proto::Stream& pbStream = currentStripeFooter_.streams(i); uint64_t colId = pbStream.column(); - if (selectedColumns[colId] && pbStream.has_kind() && + if (selectedColumns_[colId] && pbStream.has_kind() && (pbStream.kind() == proto::Stream_Kind_ROW_INDEX || pbStream.kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8)) { std::unique_ptr inStream = createDecompressor( getCompression(), std::unique_ptr(new SeekableFileInputStream( - contents->stream.get(), offset, pbStream.length(), *contents->pool)), - getCompressionSize(), *contents->pool, contents->readerMetrics); + contents_->stream.get(), offset, pbStream.length(), *contents_->pool)), + getCompressionSize(), *contents_->pool, contents_->readerMetrics); if (pbStream.kind() == proto::Stream_Kind_ROW_INDEX) { proto::RowIndex rowIndex; if (!rowIndex.ParseFromZeroCopyStream(inStream.get())) { throw ParseError("Failed to parse the row index"); } - rowIndexes[colId] = rowIndex; - } else if (!skipBloomFilters) { // Stream_Kind_BLOOM_FILTER_UTF8 + rowIndexes_[colId] = rowIndex; + } else if (!skipBloomFilters_) { // Stream_Kind_BLOOM_FILTER_UTF8 proto::BloomFilterIndex pbBFIndex; if (!pbBFIndex.ParseFromZeroCopyStream(inStream.get())) { throw ParseError("Failed to parse bloom filter index"); @@ -469,11 +469,11 @@ namespace orc { BloomFilterIndex bfIndex; for (int j = 0; j < pbBFIndex.bloom_filter_size(); j++) { bfIndex.entries.push_back(BloomFilterUTF8Utils::deserialize( - pbStream.kind(), currentStripeFooter.columns(static_cast(pbStream.column())), + pbStream.kind(), currentStripeFooter_.columns(static_cast(pbStream.column())), pbBFIndex.bloom_filter(j))); } // add bloom filters to result for one column - bloomFilterIndex[pbStream.column()] = bfIndex; + bloomFilterIndex_[pbStream.column()] = bfIndex; } } offset += pbStream.length(); @@ -481,12 +481,12 @@ namespace orc { } void RowReaderImpl::seekToRowGroup(uint32_t rowGroupEntryId) { - // store positions for selected columns + // store positions for selected columns_ std::list> positions; // store position providers for selected colimns std::unordered_map positionProviders; - for (auto rowIndex = rowIndexes.cbegin(); rowIndex != rowIndexes.cend(); ++rowIndex) { + for (auto rowIndex = rowIndexes_.cbegin(); rowIndex != rowIndexes_.cend(); ++rowIndex) { uint64_t colId = rowIndex->first; const proto::RowIndexEntry& entry = rowIndex->second.entry(static_cast(rowGroupEntryId)); @@ -500,23 +500,23 @@ namespace orc { positionProviders.insert(std::make_pair(colId, PositionProvider(position))); } - reader->seekToRowGroup(positionProviders); + reader_->seekToRowGroup(positionProviders); } const FileContents& RowReaderImpl::getFileContents() const { - return *contents; + return *contents_; } bool RowReaderImpl::getThrowOnHive11DecimalOverflow() const { - return throwOnHive11DecimalOverflow; + return throwOnHive11DecimalOverflow_; } bool RowReaderImpl::getIsDecimalAsLong() const { - return contents->isDecimalAsLong; + return contents_->isDecimalAsLong; } int32_t RowReaderImpl::getForcedScaleOnHive11Decimal() const { - return forcedScaleOnHive11Decimal; + return forcedScaleOnHive11Decimal_; } proto::StripeFooter getStripeFooter(const proto::StripeInformation& info, @@ -542,17 +542,17 @@ namespace orc { return result; } - ReaderImpl::ReaderImpl(std::shared_ptr _contents, const ReaderOptions& opts, - uint64_t _fileLength, uint64_t _postscriptLength) - : contents(std::move(_contents)), - options(opts), - fileLength(_fileLength), - postscriptLength(_postscriptLength), - footer(contents->footer.get()) { - isMetadataLoaded = false; + ReaderImpl::ReaderImpl(std::shared_ptr contents, const ReaderOptions& opts, + uint64_t fileLength, uint64_t postscriptLength) + : contents_(std::move(contents)), + options_(opts), + fileLength_(fileLength), + postscriptLength_(postscriptLength), + footer_(contents->footer.get()) { + isMetadataLoaded_ = false; checkOrcVersion(); - numberOfStripes = static_cast(footer->stripes_size()); - contents->schema = convertType(footer->types(0), *footer); + numberOfStripes_ = static_cast(footer_->stripes_size()); + contents->schema = convertType(footer_->types(0), *footer_); contents->blockSize = getCompressionBlockSize(*contents->postscript); contents->compression = convertCompressionKind(*contents->postscript); } @@ -560,11 +560,11 @@ namespace orc { std::string ReaderImpl::getSerializedFileTail() const { proto::FileTail tail; proto::PostScript* mutable_ps = tail.mutable_postscript(); - mutable_ps->CopyFrom(*contents->postscript); + mutable_ps->CopyFrom(*contents_->postscript); proto::Footer* mutableFooter = tail.mutable_footer(); - mutableFooter->CopyFrom(*footer); - tail.set_file_length(fileLength); - tail.set_postscript_length(postscriptLength); + mutableFooter->CopyFrom(*footer_); + tail.set_file_length(fileLength_); + tail.set_postscript_length(postscriptLength_); std::string result; if (!tail.SerializeToString(&result)) { throw ParseError("Failed to serialize file tail"); @@ -573,56 +573,56 @@ namespace orc { } const ReaderOptions& ReaderImpl::getReaderOptions() const { - return options; + return options_; } CompressionKind ReaderImpl::getCompression() const { - return contents->compression; + return contents_->compression; } uint64_t ReaderImpl::getCompressionSize() const { - return contents->blockSize; + return contents_->blockSize; } uint64_t ReaderImpl::getNumberOfStripes() const { - return numberOfStripes; + return numberOfStripes_; } uint64_t ReaderImpl::getNumberOfStripeStatistics() const { - if (!isMetadataLoaded) { + if (!isMetadataLoaded_) { readMetadata(); } - return contents->metadata == nullptr + return contents_->metadata == nullptr ? 0 - : static_cast(contents->metadata->stripe_stats_size()); + : static_cast(contents_->metadata->stripe_stats_size()); } std::unique_ptr ReaderImpl::getStripe(uint64_t stripeIndex) const { if (stripeIndex > getNumberOfStripes()) { throw std::logic_error("stripe index out of range"); } - proto::StripeInformation stripeInfo = footer->stripes(static_cast(stripeIndex)); + proto::StripeInformation stripeInfo = footer_->stripes(static_cast(stripeIndex)); return std::unique_ptr(new StripeInformationImpl( stripeInfo.offset(), stripeInfo.index_length(), stripeInfo.data_length(), - stripeInfo.footer_length(), stripeInfo.number_of_rows(), contents->stream.get(), - *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics)); + stripeInfo.footer_length(), stripeInfo.number_of_rows(), contents_->stream.get(), + *contents_->pool, contents_->compression, contents_->blockSize, contents_->readerMetrics)); } FileVersion ReaderImpl::getFormatVersion() const { - if (contents->postscript->version_size() != 2) { + if (contents_->postscript->version_size() != 2) { return FileVersion::v_0_11(); } - return {contents->postscript->version(0), contents->postscript->version(1)}; + return {contents_->postscript->version(0), contents_->postscript->version(1)}; } uint64_t ReaderImpl::getNumberOfRows() const { - return footer->number_of_rows(); + return footer_->number_of_rows(); } WriterId ReaderImpl::getWriterId() const { - if (footer->has_writer()) { - uint32_t id = footer->writer(); + if (footer_->has_writer()) { + uint32_t id = footer_->writer(); if (id > WriterId::CUDF_WRITER) { return WriterId::UNKNOWN_WRITER; } else { @@ -633,8 +633,8 @@ namespace orc { } uint32_t ReaderImpl::getWriterIdValue() const { - if (footer->has_writer()) { - return footer->writer(); + if (footer_->has_writer()) { + return footer_->writer(); } else { return WriterId::ORC_JAVA_WRITER; } @@ -643,56 +643,56 @@ namespace orc { std::string ReaderImpl::getSoftwareVersion() const { std::ostringstream buffer; buffer << writerIdToString(getWriterIdValue()); - if (footer->has_software_version()) { - buffer << " " << footer->software_version(); + if (footer_->has_software_version()) { + buffer << " " << footer_->software_version(); } return buffer.str(); } WriterVersion ReaderImpl::getWriterVersion() const { - return getWriterVersionImpl(contents.get()); + return getWriterVersionImpl(contents_.get()); } uint64_t ReaderImpl::getContentLength() const { - return footer->content_length(); + return footer_->content_length(); } uint64_t ReaderImpl::getStripeStatisticsLength() const { - return contents->postscript->metadata_length(); + return contents_->postscript->metadata_length(); } uint64_t ReaderImpl::getFileFooterLength() const { - return contents->postscript->footer_length(); + return contents_->postscript->footer_length(); } uint64_t ReaderImpl::getFilePostscriptLength() const { - return postscriptLength; + return postscriptLength_; } uint64_t ReaderImpl::getFileLength() const { - return fileLength; + return fileLength_; } uint64_t ReaderImpl::getRowIndexStride() const { - return footer->row_index_stride(); + return footer_->row_index_stride(); } const std::string& ReaderImpl::getStreamName() const { - return contents->stream->getName(); + return contents_->stream->getName(); } std::list ReaderImpl::getMetadataKeys() const { std::list result; - for (int i = 0; i < footer->metadata_size(); ++i) { - result.push_back(footer->metadata(i).name()); + for (int i = 0; i < footer_->metadata_size(); ++i) { + result.push_back(footer_->metadata(i).name()); } return result; } std::string ReaderImpl::getMetadataValue(const std::string& key) const { - for (int i = 0; i < footer->metadata_size(); ++i) { - if (footer->metadata(i).name() == key) { - return footer->metadata(i).value(); + for (int i = 0; i < footer_->metadata_size(); ++i) { + if (footer_->metadata(i).name() == key) { + return footer_->metadata(i).value(); } } throw std::range_error("key not found"); @@ -719,10 +719,10 @@ namespace orc { throw ParseError(msg.str()); } std::unique_ptr pbStream = - createDecompressor(contents->compression, + createDecompressor(contents_->compression, std::unique_ptr(new SeekableFileInputStream( - contents->stream.get(), offset, length, *contents->pool)), - contents->blockSize, *(contents->pool), contents->readerMetrics); + contents_->stream.get(), offset, length, *contents_->pool)), + contents_->blockSize, *(contents_->pool), contents_->readerMetrics); proto::RowIndex rowIndex; if (!rowIndex.ParseFromZeroCopyStream(pbStream.get())) { @@ -740,8 +740,8 @@ namespace orc { } bool ReaderImpl::hasMetadataValue(const std::string& key) const { - for (int i = 0; i < footer->metadata_size(); ++i) { - if (footer->metadata(i).name() == key) { + for (int i = 0; i < footer_->metadata_size(); ++i) { + if (footer_->metadata(i).name() == key) { return true; } } @@ -749,22 +749,22 @@ namespace orc { } const Type& ReaderImpl::getType() const { - return *(contents->schema.get()); + return *(contents_->schema.get()); } std::unique_ptr ReaderImpl::getStripeStatistics(uint64_t stripeIndex) const { - if (!isMetadataLoaded) { + if (!isMetadataLoaded_) { readMetadata(); } - if (contents->metadata == nullptr) { + if (contents_->metadata == nullptr) { throw std::logic_error("No stripe statistics in file"); } size_t num_cols = static_cast( - contents->metadata->stripe_stats(static_cast(stripeIndex)).col_stats_size()); + contents_->metadata->stripe_stats(static_cast(stripeIndex)).col_stats_size()); std::vector> indexStats(num_cols); - proto::StripeInformation currentStripeInfo = footer->stripes(static_cast(stripeIndex)); - proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents.get()); + proto::StripeInformation currentStripeInfo = footer_->stripes(static_cast(stripeIndex)); + proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents_.get()); getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter, &indexStats); @@ -773,47 +773,47 @@ namespace orc { : getLocalTimezone(); StatContext statContext(hasCorrectStatistics(), &writerTZ); return std::make_unique( - contents->metadata->stripe_stats(static_cast(stripeIndex)), indexStats, statContext); + contents_->metadata->stripe_stats(static_cast(stripeIndex)), indexStats, statContext); } std::unique_ptr ReaderImpl::getStatistics() const { StatContext statContext(hasCorrectStatistics()); - return std::make_unique(*footer, statContext); + return std::make_unique(*footer_, statContext); } std::unique_ptr ReaderImpl::getColumnStatistics(uint32_t index) const { - if (index >= static_cast(footer->statistics_size())) { + if (index >= static_cast(footer_->statistics_size())) { throw std::logic_error("column index out of range"); } - proto::ColumnStatistics col = footer->statistics(static_cast(index)); + proto::ColumnStatistics col = footer_->statistics(static_cast(index)); StatContext statContext(hasCorrectStatistics()); return std::unique_ptr(convertColumnStatistics(col, statContext)); } void ReaderImpl::readMetadata() const { - uint64_t metadataSize = contents->postscript->metadata_length(); - uint64_t footerLength = contents->postscript->footer_length(); - if (fileLength < metadataSize + footerLength + postscriptLength + 1) { + uint64_t metadataSize = contents_->postscript->metadata_length(); + uint64_t footerLength = contents_->postscript->footer_length(); + if (fileLength_ < metadataSize + footerLength + postscriptLength_ + 1) { std::stringstream msg; - msg << "Invalid Metadata length: fileLength=" << fileLength + msg << "Invalid Metadata length: fileLength=" << fileLength_ << ", metadataLength=" << metadataSize << ", footerLength=" << footerLength - << ", postscriptLength=" << postscriptLength; + << ", postscriptLength=" << postscriptLength_; throw ParseError(msg.str()); } - uint64_t metadataStart = fileLength - metadataSize - footerLength - postscriptLength - 1; + uint64_t metadataStart = fileLength_ - metadataSize - footerLength - postscriptLength_ - 1; if (metadataSize != 0) { std::unique_ptr pbStream = createDecompressor( - contents->compression, - std::make_unique(contents->stream.get(), metadataStart, - metadataSize, *contents->pool), - contents->blockSize, *contents->pool, contents->readerMetrics); - contents->metadata.reset(new proto::Metadata()); - if (!contents->metadata->ParseFromZeroCopyStream(pbStream.get())) { + contents_->compression, + std::make_unique(contents_->stream.get(), metadataStart, + metadataSize, *contents_->pool), + contents_->blockSize, *contents_->pool, contents_->readerMetrics); + contents_->metadata.reset(new proto::Metadata()); + if (!contents_->metadata->ParseFromZeroCopyStream(pbStream.get())) { throw ParseError("Failed to parse the metadata"); } } - isMetadataLoaded = true; + isMetadataLoaded_ = true; } bool ReaderImpl::hasCorrectStatistics() const { @@ -823,7 +823,7 @@ namespace orc { void ReaderImpl::checkOrcVersion() { FileVersion version = getFormatVersion(); if (version != FileVersion(0, 11) && version != FileVersion(0, 12)) { - *(options.getErrorStream()) << "Warning: ORC file " << contents->stream->getName() + *(options_.getErrorStream()) << "Warning: ORC file " << contents_->stream->getName() << " was written in an unknown format version " << version.toString() << "\n"; } @@ -835,11 +835,11 @@ namespace orc { } std::unique_ptr ReaderImpl::createRowReader(const RowReaderOptions& opts) const { - if (opts.getSearchArgument() && !isMetadataLoaded) { + if (opts.getSearchArgument() && !isMetadataLoaded_) { // load stripe statistics for PPD readMetadata(); } - return std::make_unique(contents, opts); + return std::make_unique(contents_, opts); } uint64_t maxStreamsForType(const proto::Type& type) { @@ -874,60 +874,60 @@ namespace orc { uint64_t ReaderImpl::getMemoryUse(int stripeIx) { std::vector selectedColumns; - selectedColumns.assign(static_cast(contents->footer->types_size()), true); + selectedColumns.assign(static_cast(contents_->footer->types_size()), true); return getMemoryUse(stripeIx, selectedColumns); } uint64_t ReaderImpl::getMemoryUseByFieldId(const std::list& include, int stripeIx) { std::vector selectedColumns; - selectedColumns.assign(static_cast(contents->footer->types_size()), false); - ColumnSelector column_selector(contents.get()); - if (contents->schema->getKind() == STRUCT && include.begin() != include.end()) { + selectedColumns.assign(static_cast(contents_->footer->types_size()), false); + ColumnSelector column_selector(contents_.get()); + if (contents_->schema->getKind() == STRUCT && include.begin() != include.end()) { for (std::list::const_iterator field = include.begin(); field != include.end(); ++field) { column_selector.updateSelectedByFieldId(selectedColumns, *field); } } else { - // default is to select all columns + // default is to select all columns_ std::fill(selectedColumns.begin(), selectedColumns.end(), true); } - column_selector.selectParents(selectedColumns, *contents->schema.get()); + column_selector.selectParents(selectedColumns, *contents_->schema.get()); selectedColumns[0] = true; // column 0 is selected by default return getMemoryUse(stripeIx, selectedColumns); } uint64_t ReaderImpl::getMemoryUseByName(const std::list& names, int stripeIx) { std::vector selectedColumns; - selectedColumns.assign(static_cast(contents->footer->types_size()), false); - ColumnSelector column_selector(contents.get()); - if (contents->schema->getKind() == STRUCT && names.begin() != names.end()) { + selectedColumns.assign(static_cast(contents_->footer->types_size()), false); + ColumnSelector column_selector(contents_.get()); + if (contents_->schema->getKind() == STRUCT && names.begin() != names.end()) { for (std::list::const_iterator field = names.begin(); field != names.end(); ++field) { column_selector.updateSelectedByName(selectedColumns, *field); } } else { - // default is to select all columns + // default is to select all columns_ std::fill(selectedColumns.begin(), selectedColumns.end(), true); } - column_selector.selectParents(selectedColumns, *contents->schema.get()); + column_selector.selectParents(selectedColumns, *contents_->schema.get()); selectedColumns[0] = true; // column 0 is selected by default return getMemoryUse(stripeIx, selectedColumns); } uint64_t ReaderImpl::getMemoryUseByTypeId(const std::list& include, int stripeIx) { std::vector selectedColumns; - selectedColumns.assign(static_cast(contents->footer->types_size()), false); - ColumnSelector column_selector(contents.get()); + selectedColumns.assign(static_cast(contents_->footer->types_size()), false); + ColumnSelector column_selector(contents_.get()); if (include.begin() != include.end()) { for (std::list::const_iterator field = include.begin(); field != include.end(); ++field) { column_selector.updateSelectedByTypeId(selectedColumns, *field); } } else { - // default is to select all columns + // default is to select all columns_ std::fill(selectedColumns.begin(), selectedColumns.end(), true); } - column_selector.selectParents(selectedColumns, *contents->schema.get()); + column_selector.selectParents(selectedColumns, *contents_->schema.get()); selectedColumns[0] = true; // column 0 is selected by default return getMemoryUse(stripeIx, selectedColumns); } @@ -935,14 +935,14 @@ namespace orc { uint64_t ReaderImpl::getMemoryUse(int stripeIx, std::vector& selectedColumns) { uint64_t maxDataLength = 0; - if (stripeIx >= 0 && stripeIx < footer->stripes_size()) { - uint64_t stripe = footer->stripes(stripeIx).data_length(); + if (stripeIx >= 0 && stripeIx < footer_->stripes_size()) { + uint64_t stripe = footer_->stripes(stripeIx).data_length(); if (maxDataLength < stripe) { maxDataLength = stripe; } } else { - for (int i = 0; i < footer->stripes_size(); i++) { - uint64_t stripe = footer->stripes(i).data_length(); + for (int i = 0; i < footer_->stripes_size(); i++) { + uint64_t stripe = footer_->stripes(i).data_length(); if (maxDataLength < stripe) { maxDataLength = stripe; } @@ -951,9 +951,9 @@ namespace orc { bool hasStringColumn = false; uint64_t nSelectedStreams = 0; - for (int i = 0; !hasStringColumn && i < footer->types_size(); i++) { + for (int i = 0; !hasStringColumn && i < footer_->types_size(); i++) { if (selectedColumns[static_cast(i)]) { - const proto::Type& type = footer->types(i); + const proto::Type& type = footer_->types(i); nSelectedStreams += maxStreamsForType(type); switch (static_cast(type.kind())) { case proto::Type_Kind_CHAR: @@ -979,29 +979,29 @@ namespace orc { uint64_t memory = hasStringColumn ? 2 * maxDataLength : std::min(uint64_t(maxDataLength), - nSelectedStreams * contents->stream->getNaturalReadSize()); + nSelectedStreams * contents_->stream->getNaturalReadSize()); // Do we need even more memory to read the footer or the metadata? - if (memory < contents->postscript->footer_length() + DIRECTORY_SIZE_GUESS) { - memory = contents->postscript->footer_length() + DIRECTORY_SIZE_GUESS; + if (memory < contents_->postscript->footer_length() + DIRECTORY_SIZE_GUESS) { + memory = contents_->postscript->footer_length() + DIRECTORY_SIZE_GUESS; } - if (memory < contents->postscript->metadata_length()) { - memory = contents->postscript->metadata_length(); + if (memory < contents_->postscript->metadata_length()) { + memory = contents_->postscript->metadata_length(); } // Account for firstRowOfStripe. - memory += static_cast(footer->stripes_size()) * sizeof(uint64_t); + memory += static_cast(footer_->stripes_size()) * sizeof(uint64_t); // Decompressors need buffers for each stream uint64_t decompressorMemory = 0; - if (contents->compression != CompressionKind_NONE) { - for (int i = 0; i < footer->types_size(); i++) { + if (contents_->compression != CompressionKind_NONE) { + for (int i = 0; i < footer_->types_size(); i++) { if (selectedColumns[static_cast(i)]) { - const proto::Type& type = footer->types(i); - decompressorMemory += maxStreamsForType(type) * contents->blockSize; + const proto::Type& type = footer_->types(i); + decompressorMemory += maxStreamsForType(type) * contents_->blockSize; } } - if (contents->compression == CompressionKind_SNAPPY) { + if (contents_->compression == CompressionKind_SNAPPY) { decompressorMemory *= 2; // Snappy decompressor uses a second buffer } } @@ -1011,69 +1011,69 @@ namespace orc { // Update fields to indicate we've reached the end of file void RowReaderImpl::markEndOfFile() { - currentStripe = lastStripe; - currentRowInStripe = 0; - rowsInCurrentStripe = 0; - if (lastStripe == 0) { + currentStripe_ = lastStripe_; + currentRowInStripe_ = 0; + rowsInCurrentStripe_ = 0; + if (lastStripe_ == 0) { // Empty file - previousRow = 0; + previousRow_ = 0; } else { - previousRow = firstRowOfStripe[lastStripe - 1] + - footer->stripes(static_cast(lastStripe - 1)).number_of_rows(); + previousRow_ = firstRowOfStripe_[lastStripe_ - 1] + + footer_->stripes(static_cast(lastStripe_ - 1)).number_of_rows(); } } void RowReaderImpl::startNextStripe() { - reader.reset(); // ColumnReaders use lots of memory; free old memory first - rowIndexes.clear(); - bloomFilterIndex.clear(); + reader_.reset(); // ColumnReaders use lots of memory; free old memory first + rowIndexes_.clear(); + bloomFilterIndex_.clear(); // evaluate file statistics if it exists - if (sargsApplier && !sargsApplier->evaluateFileStatistics(*footer, numRowGroupsInStripeRange)) { + if (sargsApplier_ && !sargsApplier_->evaluateFileStatistics(*footer_, numRowGroupsInStripeRange_)) { // skip the entire file markEndOfFile(); return; } do { - currentStripeInfo = footer->stripes(static_cast(currentStripe)); - uint64_t fileLength = contents->stream->getLength(); - if (currentStripeInfo.offset() + currentStripeInfo.index_length() + - currentStripeInfo.data_length() + currentStripeInfo.footer_length() >= + currentStripeInfo_ = footer_->stripes(static_cast(currentStripe_)); + uint64_t fileLength = contents_->stream->getLength(); + if (currentStripeInfo_.offset() + currentStripeInfo_.index_length() + + currentStripeInfo_.data_length() + currentStripeInfo_.footer_length() >= fileLength) { std::stringstream msg; - msg << "Malformed StripeInformation at stripe index " << currentStripe + msg << "Malformed StripeInformation at stripe index " << currentStripe_ << ": fileLength=" << fileLength - << ", StripeInfo=(offset=" << currentStripeInfo.offset() - << ", indexLength=" << currentStripeInfo.index_length() - << ", dataLength=" << currentStripeInfo.data_length() - << ", footerLength=" << currentStripeInfo.footer_length() << ")"; + << ", StripeInfo=(offset=" << currentStripeInfo_.offset() + << ", indexLength=" << currentStripeInfo_.index_length() + << ", dataLength=" << currentStripeInfo_.data_length() + << ", footerLength=" << currentStripeInfo_.footer_length() << ")"; throw ParseError(msg.str()); } - rowsInCurrentStripe = currentStripeInfo.number_of_rows(); - processingStripe = currentStripe; + rowsInCurrentStripe_ = currentStripeInfo_.number_of_rows(); + processingStripe_ = currentStripe_; bool isStripeNeeded = true; // If PPD enabled and stripe stats existed, evaulate it first - if (sargsApplier && contents->metadata) { + if (sargsApplier_ && contents_->metadata) { const auto& currentStripeStats = - contents->metadata->stripe_stats(static_cast(currentStripe)); + contents_->metadata->stripe_stats(static_cast(currentStripe_)); // skip this stripe after stats fail to satisfy sargs uint64_t stripeRowGroupCount = - (rowsInCurrentStripe + footer->row_index_stride() - 1) / footer->row_index_stride(); + (rowsInCurrentStripe_ + footer_->row_index_stride() - 1) / footer_->row_index_stride(); isStripeNeeded = - sargsApplier->evaluateStripeStatistics(currentStripeStats, stripeRowGroupCount); + sargsApplier_->evaluateStripeStatistics(currentStripeStats, stripeRowGroupCount); } if (isStripeNeeded) { - currentStripeFooter = getStripeFooter(currentStripeInfo, *contents.get()); - if (sargsApplier) { + currentStripeFooter_ = getStripeFooter(currentStripeInfo_, *contents_.get()); + if (sargsApplier_) { // read row group statistics and bloom filters of current stripe loadStripeIndex(); // select row groups to read in the current stripe - sargsApplier->pickRowGroups(rowsInCurrentStripe, rowIndexes, bloomFilterIndex); - if (sargsApplier->hasSelectedFrom(currentRowInStripe)) { + sargsApplier_->pickRowGroups(rowsInCurrentStripe_, rowIndexes_, bloomFilterIndex_); + if (sargsApplier_->hasSelectedFrom(currentRowInStripe_)) { // current stripe has at least one row group matching the predicate break; } @@ -1083,31 +1083,31 @@ namespace orc { if (!isStripeNeeded) { // advance to next stripe when current stripe has no matching rows - currentStripe += 1; - currentRowInStripe = 0; + currentStripe_ += 1; + currentRowInStripe_ = 0; } - } while (sargsApplier && currentStripe < lastStripe); + } while (sargsApplier_ && currentStripe_ < lastStripe_); - if (currentStripe < lastStripe) { + if (currentStripe_ < lastStripe_) { // get writer timezone info from stripe footer to help understand timestamp values. const Timezone& writerTimezone = - currentStripeFooter.has_writer_timezone() - ? getTimezoneByName(currentStripeFooter.writer_timezone()) - : localTimezone; - StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo, currentStripeFooter, - currentStripeInfo.offset(), *contents->stream, writerTimezone, - readerTimezone); - reader = buildReader(*contents->schema, stripeStreams, useTightNumericVector, - throwOnSchemaEvolutionOverflow, /*convertToReadType=*/true); - - if (sargsApplier) { + currentStripeFooter_.has_writer_timezone() + ? getTimezoneByName(currentStripeFooter_.writer_timezone()) + : localTimezone_; + StripeStreamsImpl stripeStreams(*this, currentStripe_, currentStripeInfo_, currentStripeFooter_, + currentStripeInfo_.offset(), *contents_->stream, writerTimezone, + readerTimezone_); + reader_ = buildReader(*contents_->schema, stripeStreams, useTightNumericVector_, + throwOnSchemaEvolutionOverflow_, /*convertToReadType=*/true); + + if (sargsApplier_) { // move to the 1st selected row group when PPD is enabled. - currentRowInStripe = - advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, - footer->row_index_stride(), sargsApplier->getNextSkippedRows()); - previousRow = firstRowOfStripe[currentStripe] + currentRowInStripe - 1; - if (currentRowInStripe > 0) { - seekToRowGroup(static_cast(currentRowInStripe / footer->row_index_stride())); + currentRowInStripe_ = + advanceToNextRowGroup(currentRowInStripe_, rowsInCurrentStripe_, + footer_->row_index_stride(), sargsApplier_->getNextSkippedRows()); + previousRow_ = firstRowOfStripe_[currentStripe_] + currentRowInStripe_ - 1; + if (currentRowInStripe_ > 0) { + seekToRowGroup(static_cast(currentRowInStripe_ / footer_->row_index_stride())); } } } else { @@ -1117,52 +1117,52 @@ namespace orc { } bool RowReaderImpl::next(ColumnVectorBatch& data) { - SCOPED_STOPWATCH(contents->readerMetrics, ReaderInclusiveLatencyUs, ReaderCall); - if (currentStripe >= lastStripe) { + SCOPED_STOPWATCH(contents_->readerMetrics, ReaderInclusiveLatencyUs, ReaderCall); + if (currentStripe_ >= lastStripe_) { data.numElements = 0; markEndOfFile(); return false; } - if (currentRowInStripe == 0) { + if (currentRowInStripe_ == 0) { startNextStripe(); } uint64_t rowsToRead = - std::min(static_cast(data.capacity), rowsInCurrentStripe - currentRowInStripe); - if (sargsApplier && rowsToRead > 0) { - rowsToRead = computeBatchSize(rowsToRead, currentRowInStripe, rowsInCurrentStripe, - footer->row_index_stride(), sargsApplier->getNextSkippedRows()); + std::min(static_cast(data.capacity), rowsInCurrentStripe_ - currentRowInStripe_); + if (sargsApplier_ && rowsToRead > 0) { + rowsToRead = computeBatchSize(rowsToRead, currentRowInStripe_, rowsInCurrentStripe_, + footer_->row_index_stride(), sargsApplier_->getNextSkippedRows()); } data.numElements = rowsToRead; if (rowsToRead == 0) { markEndOfFile(); return false; } - if (enableEncodedBlock) { - reader->nextEncoded(data, rowsToRead, nullptr); + if (enableEncodedBlock_) { + reader_->nextEncoded(data, rowsToRead, nullptr); } else { - reader->next(data, rowsToRead, nullptr); + reader_->next(data, rowsToRead, nullptr); } // update row number - previousRow = firstRowOfStripe[currentStripe] + currentRowInStripe; - currentRowInStripe += rowsToRead; + previousRow_ = firstRowOfStripe_[currentStripe_] + currentRowInStripe_; + currentRowInStripe_ += rowsToRead; // check if we need to advance to next selected row group - if (sargsApplier) { + if (sargsApplier_) { uint64_t nextRowToRead = - advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, footer->row_index_stride(), - sargsApplier->getNextSkippedRows()); - if (currentRowInStripe != nextRowToRead) { + advanceToNextRowGroup(currentRowInStripe_, rowsInCurrentStripe_, footer_->row_index_stride(), + sargsApplier_->getNextSkippedRows()); + if (currentRowInStripe_ != nextRowToRead) { // it is guaranteed to be at start of a row group - currentRowInStripe = nextRowToRead; - if (currentRowInStripe < rowsInCurrentStripe) { - seekToRowGroup(static_cast(currentRowInStripe / footer->row_index_stride())); + currentRowInStripe_ = nextRowToRead; + if (currentRowInStripe_ < rowsInCurrentStripe_) { + seekToRowGroup(static_cast(currentRowInStripe_ / footer_->row_index_stride())); } } } - if (currentRowInStripe >= rowsInCurrentStripe) { - currentStripe += 1; - currentRowInStripe = 0; + if (currentRowInStripe_ >= rowsInCurrentStripe_) { + currentStripe_ += 1; + currentRowInStripe_ = 0; } return rowsToRead != 0; } @@ -1221,9 +1221,9 @@ namespace orc { std::unique_ptr RowReaderImpl::createRowBatch(uint64_t capacity) const { // If the read type is specified, then check that the selected schema matches the read type // on the first call to createRowBatch. - if (schemaEvolution.getReadType() && selectedSchema.get() == nullptr) { + if (schemaEvolution_.getReadType() && selectedSchema_.get() == nullptr) { auto fileSchema = &getSelectedType(); - auto readType = schemaEvolution.getReadType(); + auto readType = schemaEvolution_.getReadType(); std::set readColumns, fileColumns; getColumnIds(readType, readColumns); getColumnIds(fileSchema, fileColumns); @@ -1235,9 +1235,9 @@ namespace orc { } } const Type& readType = - schemaEvolution.getReadType() ? *schemaEvolution.getReadType() : getSelectedType(); - return readType.createRowBatch(capacity, *contents->pool, enableEncodedBlock, - useTightNumericVector); + schemaEvolution_.getReadType() ? *schemaEvolution_.getReadType() : getSelectedType(); + return readType.createRowBatch(capacity, *contents_->pool, enableEncodedBlock_, + useTightNumericVector_); } void ensureOrcFooter(InputStream* stream, DataBuffer* buffer, uint64_t postscriptLength) { @@ -1359,10 +1359,10 @@ namespace orc { std::unique_ptr createReader(std::unique_ptr stream, const ReaderOptions& options) { - auto contents = std::make_shared(); - contents->pool = options.getMemoryPool(); - contents->errorStream = options.getErrorStream(); - contents->readerMetrics = options.getReaderMetrics(); + auto contents_ = std::make_shared(); + contents_->pool = options.getMemoryPool(); + contents_->errorStream = options.getErrorStream(); + contents_->readerMetrics = options.getReaderMetrics(); std::string serializedFooter = options.getSerializedFileTail(); uint64_t fileLength; uint64_t postscriptLength; @@ -1372,8 +1372,8 @@ namespace orc { if (!tail.ParseFromString(serializedFooter)) { throw ParseError("Failed to parse the file tail from string"); } - contents->postscript = std::make_unique(tail.postscript()); - contents->footer = std::make_unique(tail.footer()); + contents_->postscript = std::make_unique(tail.postscript()); + contents_->footer = std::make_unique(tail.footer()); fileLength = tail.file_length(); postscriptLength = tail.postscript_length(); } else { @@ -1385,12 +1385,12 @@ namespace orc { if (readSize < 4) { throw ParseError("File size too small"); } - auto buffer = std::make_unique>(*contents->pool, readSize); + auto buffer = std::make_unique>(*contents_->pool, readSize); stream->read(buffer->data(), readSize, fileLength - readSize); postscriptLength = buffer->data()[readSize - 1] & 0xff; - contents->postscript = readPostscript(stream.get(), buffer.get(), postscriptLength); - uint64_t footerSize = contents->postscript->footer_length(); + contents_->postscript = readPostscript(stream.get(), buffer.get(), postscriptLength); + uint64_t footerSize = contents_->postscript->footer_length(); uint64_t tailSize = 1 + postscriptLength + footerSize; if (tailSize >= fileLength) { std::stringstream msg; @@ -1407,18 +1407,18 @@ namespace orc { footerOffset = readSize - tailSize; } - contents->footer = readFooter(stream.get(), buffer.get(), footerOffset, *contents->postscript, - *contents->pool, contents->readerMetrics); + contents_->footer = readFooter(stream.get(), buffer.get(), footerOffset, *contents_->postscript, + *contents_->pool, contents_->readerMetrics); } - contents->isDecimalAsLong = false; - if (contents->postscript->version_size() == 2) { - FileVersion v(contents->postscript->version(0), contents->postscript->version(1)); + contents_->isDecimalAsLong = false; + if (contents_->postscript->version_size() == 2) { + FileVersion v(contents_->postscript->version(0), contents_->postscript->version(1)); if (v == FileVersion::UNSTABLE_PRE_2_0()) { - contents->isDecimalAsLong = true; + contents_->isDecimalAsLong = true; } } - contents->stream = std::move(stream); - return std::make_unique(std::move(contents), options, fileLength, postscriptLength); + contents_->stream = std::move(stream); + return std::make_unique(std::move(contents_), options, fileLength, postscriptLength); } std::map ReaderImpl::getBloomFilters( @@ -1426,13 +1426,13 @@ namespace orc { std::map ret; // find stripe info - if (stripeIndex >= static_cast(footer->stripes_size())) { + if (stripeIndex >= static_cast(footer_->stripes_size())) { throw std::logic_error("Illegal stripe index: " + to_string(static_cast(stripeIndex))); } const proto::StripeInformation currentStripeInfo = - footer->stripes(static_cast(stripeIndex)); - const proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents); + footer_->stripes(static_cast(stripeIndex)); + const proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents_); // iterate stripe footer to get stream of bloom_filter uint64_t offset = static_cast(currentStripeInfo.offset()); @@ -1445,10 +1445,10 @@ namespace orc { if (stream.kind() == proto::Stream_Kind_BLOOM_FILTER_UTF8 && (included.empty() || included.find(column) != included.end())) { std::unique_ptr pbStream = - createDecompressor(contents->compression, + createDecompressor(contents_->compression, std::make_unique( - contents->stream.get(), offset, length, *contents->pool), - contents->blockSize, *(contents->pool), contents->readerMetrics); + contents_->stream.get(), offset, length, *contents_->pool), + contents_->blockSize, *(contents_->pool), contents_->readerMetrics); proto::BloomFilterIndex pbBFIndex; if (!pbBFIndex.ParseFromZeroCopyStream(pbStream.get())) { diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh index a1367e4bd37..630d812c38f 100644 --- a/c++/src/Reader.hh +++ b/c++/src/Reader.hh @@ -39,17 +39,17 @@ namespace orc { */ class WriterVersionImpl { private: - WriterVersion version; + WriterVersion version_; public: // Known Versions with issues resolved // The static method below is to fix global constructors Clang warning static const WriterVersionImpl& VERSION_HIVE_8732(); - WriterVersionImpl(WriterVersion ver) : version(ver) {} + WriterVersionImpl(WriterVersion ver) : version_(ver) {} bool compareGT(const WriterVersion other) const { - return version > other; + return version_ > other; } }; @@ -80,10 +80,10 @@ namespace orc { class ColumnSelector { private: - std::map nameIdMap; - std::map idTypeMap; - const FileContents* contents; - std::vector columns; + std::map nameIdMap_; + std::map idTypeMap_; + const FileContents* contents_; + std::vector columns_; // build map from type name and id, id to Type void buildTypeNameIdMap(const Type* type); @@ -127,54 +127,54 @@ namespace orc { class RowReaderImpl : public RowReader { private: - const Timezone& localTimezone; + const Timezone& localTimezone_; // contents - std::shared_ptr contents; - const bool throwOnHive11DecimalOverflow; - const int32_t forcedScaleOnHive11Decimal; + std::shared_ptr contents_; + const bool throwOnHive11DecimalOverflow_; + const int32_t forcedScaleOnHive11Decimal_; // inputs - std::vector selectedColumns; + std::vector selectedColumns_; // footer - proto::Footer* footer; - DataBuffer firstRowOfStripe; - mutable std::unique_ptr selectedSchema; - bool skipBloomFilters; + proto::Footer* footer_; + DataBuffer firstRowOfStripe_; + mutable std::unique_ptr selectedSchema_; + bool skipBloomFilters_; // reading state - uint64_t previousRow; - uint64_t firstStripe; - uint64_t currentStripe; - uint64_t lastStripe; // the stripe AFTER the last one - uint64_t processingStripe; - uint64_t currentRowInStripe; - uint64_t rowsInCurrentStripe; + uint64_t previousRow_; + uint64_t firstStripe_; + uint64_t currentStripe_; + uint64_t lastStripe_; // the stripe AFTER the last one + uint64_t processingStripe_; + uint64_t currentRowInStripe_; + uint64_t rowsInCurrentStripe_; // number of row groups between first stripe and last stripe - uint64_t numRowGroupsInStripeRange; - proto::StripeInformation currentStripeInfo; - proto::StripeFooter currentStripeFooter; - std::unique_ptr reader; - - bool enableEncodedBlock; - bool useTightNumericVector; - bool throwOnSchemaEvolutionOverflow; + uint64_t numRowGroupsInStripeRange_; + proto::StripeInformation currentStripeInfo_; + proto::StripeFooter currentStripeFooter_; + std::unique_ptr reader_; + + bool enableEncodedBlock_; + bool useTightNumericVector_; + bool throwOnSchemaEvolutionOverflow_; // internal methods void startNextStripe(); inline void markEndOfFile(); // row index of current stripe with column id as the key - std::unordered_map rowIndexes; - std::map bloomFilterIndex; - std::shared_ptr sargs; - std::unique_ptr sargsApplier; + std::unordered_map rowIndexes_; + std::map bloomFilterIndex_; + std::shared_ptr sargs_; + std::unique_ptr sargsApplier_; // desired timezone to return data of timestamp types. - const Timezone& readerTimezone; + const Timezone& readerTimezone_; // match read and file types - SchemaEvolution schemaEvolution; + SchemaEvolution schemaEvolution_; // load stripe index if not done so void loadStripeIndex(); @@ -196,7 +196,7 @@ namespace orc { // whether the current stripe is initialized inline bool isCurrentStripeInited() const { - return currentStripe == processingStripe; + return currentStripe_ == processingStripe_; } /** @@ -243,23 +243,23 @@ namespace orc { int32_t getForcedScaleOnHive11Decimal() const; const SchemaEvolution* getSchemaEvolution() const { - return &schemaEvolution; + return &schemaEvolution_; } }; class ReaderImpl : public Reader { private: // FileContents - std::shared_ptr contents; + std::shared_ptr contents_; // inputs - const ReaderOptions options; - const uint64_t fileLength; - const uint64_t postscriptLength; + const ReaderOptions options_; + const uint64_t fileLength_; + const uint64_t postscriptLength_; // footer - proto::Footer* footer; - uint64_t numberOfStripes; + proto::Footer* footer_; + uint64_t numberOfStripes_; uint64_t getMemoryUse(int stripeIx, std::vector& selectedColumns); // internal methods @@ -271,7 +271,7 @@ namespace orc { std::vector >* indexStats) const; // metadata - mutable bool isMetadataLoaded; + mutable bool isMetadataLoaded_; public: /** @@ -341,27 +341,27 @@ namespace orc { bool hasCorrectStatistics() const override; const ReaderMetrics* getReaderMetrics() const override { - return contents->readerMetrics; + return contents_->readerMetrics; } const proto::PostScript* getPostscript() const { - return contents->postscript.get(); + return contents_->postscript.get(); } uint64_t getBlockSize() const { - return contents->blockSize; + return contents_->blockSize; } const proto::Footer* getFooter() const { - return contents->footer.get(); + return contents_->footer.get(); } const Type* getSchema() const { - return contents->schema.get(); + return contents_->schema.get(); } InputStream* getStream() const { - return contents->stream.get(); + return contents_->stream.get(); } uint64_t getMemoryUse(int stripeIx = -1) override; diff --git a/c++/src/RleDecoderV2.cc b/c++/src/RleDecoderV2.cc index c03294ecf1e..f35dd4fe5d1 100644 --- a/c++/src/RleDecoderV2.cc +++ b/c++/src/RleDecoderV2.cc @@ -31,17 +31,17 @@ namespace orc { unsigned char RleDecoderV2::readByte() { SCOPED_MINUS_STOPWATCH(metrics, DecodingLatencyUs); - if (bufferStart == bufferEnd) { + if (bufferStart_ == bufferEnd_) { int bufferLength; const void* bufferPointer; - if (!inputStream->Next(&bufferPointer, &bufferLength)) { + if (!inputStream_->Next(&bufferPointer, &bufferLength)) { throw ParseError("bad read in RleDecoderV2::readByte"); } - bufferStart = const_cast(static_cast(bufferPointer)); - bufferEnd = bufferStart + bufferLength; + bufferStart_ = const_cast(static_cast(bufferPointer)); + bufferEnd_ = bufferStart_ + bufferLength; } - unsigned char result = static_cast(*bufferStart++); + unsigned char result = static_cast(*bufferStart_++); return result; } @@ -89,29 +89,29 @@ namespace orc { return dispatch.func(this, data, offset, len, fbs); } - RleDecoderV2::RleDecoderV2(std::unique_ptr input, bool _isSigned, - MemoryPool& pool, ReaderMetrics* _metrics) - : RleDecoder(_metrics), - inputStream(std::move(input)), - isSigned(_isSigned), - firstByte(0), - bufferStart(nullptr), - bufferEnd(bufferStart), - runLength(0), - runRead(0), - bitsLeft(0), - curByte(0), - unpackedPatch(pool, 0), - literals(pool, MAX_LITERAL_SIZE) { + RleDecoderV2::RleDecoderV2(std::unique_ptr input, bool isSigned, + MemoryPool& pool, ReaderMetrics* metrics) + : RleDecoder(metrics), + inputStream_(std::move(input)), + isSigned_(isSigned), + firstByte_(0), + bufferStart_(nullptr), + bufferEnd_(bufferStart_), + runLength_(0), + runRead_(0), + bitsLeft_(0), + curByte_(0), + unpackedPatch_(pool, 0), + literals_(pool, MAX_LITERAL_SIZE) { // PASS } void RleDecoderV2::seek(PositionProvider& location) { // move the input stream - inputStream->seek(location); + inputStream_->seek(location); // clear state - bufferEnd = bufferStart = nullptr; - runRead = runLength = 0; + bufferEnd_ = bufferStart_ = nullptr; + runRead_ = runLength_ = 0; // skip ahead the given number of records skip(location.next()); } @@ -142,14 +142,14 @@ namespace orc { } } - if (runRead == runLength) { + if (runRead_ == runLength_) { resetRun(); - firstByte = readByte(); + firstByte_ = readByte(); } uint64_t offset = nRead, length = numValues - nRead; - EncodingType enc = static_cast((firstByte >> 6) & 0x03); + EncodingType enc = static_cast((firstByte_ >> 6) & 0x03); switch (static_cast(enc)) { case SHORT_REPEAT: nRead += nextShortRepeats(data, offset, length, notNull); @@ -184,37 +184,37 @@ namespace orc { template uint64_t RleDecoderV2::nextShortRepeats(T* const data, uint64_t offset, uint64_t numValues, const char* const notNull) { - if (runRead == runLength) { + if (runRead_ == runLength_) { // extract the number of fixed bytes - uint64_t byteSize = (firstByte >> 3) & 0x07; + uint64_t byteSize = (firstByte_ >> 3) & 0x07; byteSize += 1; - runLength = firstByte & 0x07; + runLength_ = firstByte_ & 0x07; // run lengths values are stored only after MIN_REPEAT value is met - runLength += MIN_REPEAT; - runRead = 0; + runLength_ += MIN_REPEAT; + runRead_ = 0; // read the repeated value which is store using fixed bytes - literals[0] = readLongBE(byteSize); + literals_[0] = readLongBE(byteSize); - if (isSigned) { - literals[0] = unZigZag(static_cast(literals[0])); + if (isSigned_) { + literals_[0] = unZigZag(static_cast(literals_[0])); } } - uint64_t nRead = std::min(runLength - runRead, numValues); + uint64_t nRead = std::min(runLength_ - runRead_, numValues); if (notNull) { for (uint64_t pos = offset; pos < offset + nRead; ++pos) { if (notNull[pos]) { - data[pos] = static_cast(literals[0]); - ++runRead; + data[pos] = static_cast(literals_[0]); + ++runRead_; } } } else { for (uint64_t pos = offset; pos < offset + nRead; ++pos) { - data[pos] = static_cast(literals[0]); - ++runRead; + data[pos] = static_cast(literals_[0]); + ++runRead_; } } @@ -224,22 +224,22 @@ namespace orc { template uint64_t RleDecoderV2::nextDirect(T* const data, uint64_t offset, uint64_t numValues, const char* const notNull) { - if (runRead == runLength) { + if (runRead_ == runLength_) { // extract the number of fixed bits - unsigned char fbo = (firstByte >> 1) & 0x1f; + unsigned char fbo = (firstByte_ >> 1) & 0x1f; uint32_t bitSize = decodeBitWidth(fbo); // extract the run length - runLength = static_cast(firstByte & 0x01) << 8; - runLength |= readByte(); + runLength_ = static_cast(firstByte_ & 0x01) << 8; + runLength_ |= readByte(); // runs are one off - runLength += 1; - runRead = 0; + runLength_ += 1; + runRead_ = 0; - readLongs(literals.data(), 0, runLength, bitSize); - if (isSigned) { - for (uint64_t i = 0; i < runLength; ++i) { - literals[i] = unZigZag(static_cast(literals[i])); + readLongs(literals_.data(), 0, runLength_, bitSize); + if (isSigned_) { + for (uint64_t i = 0; i < runLength_; ++i) { + literals_[i] = unZigZag(static_cast(literals_[i])); } } } @@ -250,8 +250,8 @@ namespace orc { void RleDecoderV2::adjustGapAndPatch(uint32_t patchBitSize, int64_t patchMask, int64_t* resGap, int64_t* resPatch, uint64_t* patchIdx) { uint64_t idx = *patchIdx; - uint64_t gap = static_cast(unpackedPatch[idx]) >> patchBitSize; - int64_t patch = unpackedPatch[idx] & patchMask; + uint64_t gap = static_cast(unpackedPatch_[idx]) >> patchBitSize; + int64_t patch = unpackedPatch_[idx] & patchMask; int64_t actualGap = 0; // special case: gap is >255 then patch value will be 0. @@ -259,8 +259,8 @@ namespace orc { while (gap == 255 && patch == 0) { actualGap += 255; ++idx; - gap = static_cast(unpackedPatch[idx]) >> patchBitSize; - patch = unpackedPatch[idx] & patchMask; + gap = static_cast(unpackedPatch_[idx]) >> patchBitSize; + patch = unpackedPatch_[idx] & patchMask; } // add the left over gap actualGap += gap; @@ -273,17 +273,17 @@ namespace orc { template uint64_t RleDecoderV2::nextPatched(T* const data, uint64_t offset, uint64_t numValues, const char* const notNull) { - if (runRead == runLength) { + if (runRead_ == runLength_) { // extract the number of fixed bits - unsigned char fbo = (firstByte >> 1) & 0x1f; + unsigned char fbo = (firstByte_ >> 1) & 0x1f; uint32_t bitSize = decodeBitWidth(fbo); // extract the run length - runLength = static_cast(firstByte & 0x01) << 8; - runLength |= readByte(); + runLength_ = static_cast(firstByte_ & 0x01) << 8; + runLength_ |= readByte(); // runs are one off - runLength += 1; - runRead = 0; + runLength_ += 1; + runRead_ = 0; // extract the number of bytes occupied by base uint64_t thirdByte = readByte(); @@ -316,12 +316,12 @@ namespace orc { base = -base; } - readLongs(literals.data(), 0, runLength, bitSize); + readLongs(literals_.data(), 0, runLength_, bitSize); // any remaining bits are thrown out resetReadLongs(); // TODO: something more efficient than resize - unpackedPatch.resize(pl); + unpackedPatch_.resize(pl); // TODO: Skip corrupt? // if ((patchBitSize + pgw) > 64 && !skipCorrupt) { if ((patchBitSize + pgw) > 64) { @@ -330,7 +330,7 @@ namespace orc { "(patchBitSize + pgw > 64)!"); } uint32_t cfb = getClosestFixedBits(patchBitSize + pgw); - readLongs(unpackedPatch.data(), 0, pl, cfb); + readLongs(unpackedPatch_.data(), 0, pl, cfb); // any remaining bits are thrown out resetReadLongs(); @@ -342,21 +342,21 @@ namespace orc { uint64_t patchIdx = 0; adjustGapAndPatch(patchBitSize, patchMask, &gap, &patch, &patchIdx); - for (uint64_t i = 0; i < runLength; ++i) { + for (uint64_t i = 0; i < runLength_; ++i) { if (static_cast(i) != gap) { // no patching required. add base to unpacked value to get final value - literals[i] += base; + literals_[i] += base; } else { // extract the patch value - int64_t patchedVal = literals[i] | (patch << bitSize); + int64_t patchedVal = literals_[i] | (patch << bitSize); // add base to patched value - literals[i] = base + patchedVal; + literals_[i] = base + patchedVal; // increment the patch to point to next entry in patch list ++patchIdx; - if (patchIdx < unpackedPatch.size()) { + if (patchIdx < unpackedPatch_.size()) { adjustGapAndPatch(patchBitSize, patchMask, &gap, &patch, &patchIdx); // next gap is relative to the current gap @@ -372,9 +372,9 @@ namespace orc { template uint64_t RleDecoderV2::nextDelta(T* const data, uint64_t offset, uint64_t numValues, const char* const notNull) { - if (runRead == runLength) { + if (runRead_ == runLength_) { // extract the number of fixed bits - unsigned char fbo = (firstByte >> 1) & 0x1f; + unsigned char fbo = (firstByte_ >> 1) & 0x1f; uint32_t bitSize; if (fbo != 0) { bitSize = decodeBitWidth(fbo); @@ -383,20 +383,20 @@ namespace orc { } // extract the run length - runLength = static_cast(firstByte & 0x01) << 8; - runLength |= readByte(); - ++runLength; // account for first value - runRead = 0; + runLength_ = static_cast(firstByte_ & 0x01) << 8; + runLength_ |= readByte(); + ++runLength_; // account for first value + runRead_ = 0; int64_t prevValue; // read the first value stored as vint - if (isSigned) { + if (isSigned_) { prevValue = readVslong(); } else { prevValue = static_cast(readVulong()); } - literals[0] = prevValue; + literals_[0] = prevValue; // read the fixed delta value stored as vint (deltas can be negative even // if all number are positive) @@ -404,28 +404,28 @@ namespace orc { if (bitSize == 0) { // add fixed deltas to adjacent values - for (uint64_t i = 1; i < runLength; ++i) { - literals[i] = literals[i - 1] + deltaBase; + for (uint64_t i = 1; i < runLength_; ++i) { + literals_[i] = literals_[i - 1] + deltaBase; } } else { - prevValue = literals[1] = prevValue + deltaBase; - if (runLength < 2) { + prevValue = literals_[1] = prevValue + deltaBase; + if (runLength_ < 2) { std::stringstream ss; - ss << "Illegal run length for delta encoding: " << runLength; + ss << "Illegal run length for delta encoding: " << runLength_; throw ParseError(ss.str()); } // write the unpacked values, add it to previous value and store final // value to result buffer. if the delta base value is negative then it // is a decreasing sequence else an increasing sequence. // read deltas using the literals buffer. - readLongs(literals.data(), 2, runLength - 2, bitSize); + readLongs(literals_.data(), 2, runLength_ - 2, bitSize); if (deltaBase < 0) { - for (uint64_t i = 2; i < runLength; ++i) { - prevValue = literals[i] = prevValue - literals[i]; + for (uint64_t i = 2; i < runLength_; ++i) { + prevValue = literals_[i] = prevValue - literals_[i]; } } else { - for (uint64_t i = 2; i < runLength; ++i) { - prevValue = literals[i] = prevValue + literals[i]; + for (uint64_t i = 2; i < runLength_; ++i) { + prevValue = literals_[i] = prevValue + literals_[i]; } } } @@ -437,16 +437,16 @@ namespace orc { template uint64_t RleDecoderV2::copyDataFromBuffer(T* data, uint64_t offset, uint64_t numValues, const char* notNull) { - uint64_t nRead = std::min(runLength - runRead, numValues); + uint64_t nRead = std::min(runLength_ - runRead_, numValues); if (notNull) { for (uint64_t i = offset; i < (offset + nRead); ++i) { if (notNull[i]) { - data[i] = static_cast(literals[runRead++]); + data[i] = static_cast(literals_[runRead_++]); } } } else { for (uint64_t i = offset; i < (offset + nRead); ++i) { - data[i] = static_cast(literals[runRead++]); + data[i] = static_cast(literals_[runRead_++]); } } return nRead; diff --git a/c++/src/RleEncoderV2.cc b/c++/src/RleEncoderV2.cc index a75aeac2eb4..18c5200254f 100644 --- a/c++/src/RleEncoderV2.cc +++ b/c++/src/RleEncoderV2.cc @@ -41,11 +41,11 @@ namespace orc { if (!reuseHist) { // histogram that store the encoded bit requirement for each values. // maximum number of bits that can encoded is 32 (refer FixedBitSizes) - memset(histgram, 0, FixedBitSizes::SIZE * sizeof(int32_t)); + memset(histgram_, 0, FixedBitSizes::SIZE * sizeof(int32_t)); // compute the histogram for (size_t i = offset; i < (offset + length); i++) { uint32_t idx = encodeBitWidth(findClosestNumBits(data[i])); - histgram[idx] += 1; + histgram_[idx] += 1; } } @@ -53,7 +53,7 @@ namespace orc { // return the bits required by pth percentile length for (int32_t i = HIST_LEN - 1; i >= 0; i--) { - perLen -= histgram[i]; + perLen -= histgram_[i]; if (perLen < 0) { return decodeBitWidth(static_cast(i)); } @@ -64,13 +64,13 @@ namespace orc { RleEncoderV2::RleEncoderV2(std::unique_ptr outStream, bool hasSigned, bool alignBitPacking) : RleEncoder(std::move(outStream), hasSigned), - alignedBitPacking(alignBitPacking), - prevDelta(0) { + alignedBitPacking_(alignBitPacking), + prevDelta_(0) { literals = new int64_t[MAX_LITERAL_SIZE]; - gapVsPatchList = new int64_t[MAX_LITERAL_SIZE]; - zigzagLiterals = hasSigned ? new int64_t[MAX_LITERAL_SIZE] : nullptr; - baseRedLiterals = new int64_t[MAX_LITERAL_SIZE]; - adjDeltas = new int64_t[MAX_LITERAL_SIZE]; + gapVsPatchList_ = new int64_t[MAX_LITERAL_SIZE]; + zigzagLiterals_ = hasSigned ? new int64_t[MAX_LITERAL_SIZE] : nullptr; + baseRedLiterals_ = new int64_t[MAX_LITERAL_SIZE]; + adjDeltas_ = new int64_t[MAX_LITERAL_SIZE]; } void RleEncoderV2::write(int64_t val) { @@ -80,39 +80,39 @@ namespace orc { } if (numLiterals == 1) { - prevDelta = val - literals[0]; + prevDelta_ = val - literals[0]; literals[numLiterals++] = val; if (val == literals[0]) { - fixedRunLength = 2; - variableRunLength = 0; + fixedRunLength_ = 2; + variableRunLength_ = 0; } else { - fixedRunLength = 0; - variableRunLength = 2; + fixedRunLength_ = 0; + variableRunLength_ = 2; } return; } int64_t currentDelta = val - literals[numLiterals - 1]; EncodingOption option = {}; - if (prevDelta == 0 && currentDelta == 0) { + if (prevDelta_ == 0 && currentDelta == 0) { // case 1: fixed delta run literals[numLiterals++] = val; - if (variableRunLength > 0) { + if (variableRunLength_ > 0) { // if variable run is non-zero then we are seeing repeating // values at the end of variable run in which case fixed Run // length is 2 - fixedRunLength = 2; + fixedRunLength_ = 2; } - fixedRunLength++; + fixedRunLength_++; // if fixed run met the minimum condition and if variable // run is non-zero then flush the variable run and shift the // tail fixed runs to start of the buffer - if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) { + if (fixedRunLength_ >= MIN_REPEAT && variableRunLength_ > 0) { numLiterals -= MIN_REPEAT; - variableRunLength -= (MIN_REPEAT - 1); + variableRunLength_ -= (MIN_REPEAT - 1); determineEncoding(option); writeValues(option); @@ -124,7 +124,7 @@ namespace orc { numLiterals = MIN_REPEAT; } - if (fixedRunLength == MAX_LITERAL_SIZE) { + if (fixedRunLength_ == MAX_LITERAL_SIZE) { option.encoding = DELTA; option.isFixedDelta = true; writeValues(option); @@ -137,8 +137,8 @@ namespace orc { // if fixed run length is non-zero and if it satisfies the // short repeat conditions then write the values as short repeats // else use delta encoding - if (fixedRunLength >= MIN_REPEAT) { - if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) { + if (fixedRunLength_ >= MIN_REPEAT) { + if (fixedRunLength_ <= MAX_SHORT_REPEAT_LENGTH) { option.encoding = SHORT_REPEAT; } else { option.encoding = DELTA; @@ -149,20 +149,20 @@ namespace orc { // if fixed run length is 0 && fixedRunLength < MIN_REPEAT && val != literals[numLiterals - 1]) { - variableRunLength = fixedRunLength; - fixedRunLength = 0; + if (fixedRunLength_ > 0 && fixedRunLength_ < MIN_REPEAT && val != literals[numLiterals - 1]) { + variableRunLength_ = fixedRunLength_; + fixedRunLength_ = 0; } // after writing values re-initialize the variables if (numLiterals == 0) { initializeLiterals(val); } else { - prevDelta = val - literals[numLiterals - 1]; + prevDelta_ = val - literals[numLiterals - 1]; literals[numLiterals++] = val; - variableRunLength++; + variableRunLength_++; - if (variableRunLength == MAX_LITERAL_SIZE) { + if (variableRunLength_ == MAX_LITERAL_SIZE) { determineEncoding(option); writeValues(option); } @@ -172,7 +172,7 @@ namespace orc { void RleEncoderV2::computeZigZagLiterals(EncodingOption& option) { assert(isSigned); for (size_t i = 0; i < numLiterals; i++) { - zigzagLiterals[option.zigzagLiteralsCount++] = zigZag(literals[i]); + zigzagLiterals_[option.zigzagLiteralsCount++] = zigZag(literals[i]); } } @@ -207,7 +207,7 @@ namespace orc { for (size_t i = 0; i < numLiterals; i++) { // if value is above mask then create the patch and record the gap - if (baseRedLiterals[i] > mask) { + if (baseRedLiterals_[i] > mask) { size_t gap = i - prev; if (gap > maxGap) { maxGap = gap; @@ -219,12 +219,12 @@ namespace orc { gapIdx++; // extract the most significant bits that are over mask bits - int64_t patch = baseRedLiterals[i] >> option.brBits95p; + int64_t patch = baseRedLiterals_[i] >> option.brBits95p; patchList.push_back(patch); patchIdx++; // strip off the MSB to enable safe bit packing - baseRedLiterals[i] &= mask; + baseRedLiterals_[i] &= mask; } } @@ -268,13 +268,13 @@ namespace orc { int64_t g = gapList[gapIdx++]; int64_t p = patchList[patchIdx++]; while (g > 255) { - gapVsPatchList[option.gapVsPatchListCount++] = (255L << option.patchWidth); + gapVsPatchList_[option.gapVsPatchListCount++] = (255L << option.patchWidth); i++; g -= 255; } // store patch value in LSBs and gap in MSBs - gapVsPatchList[option.gapVsPatchListCount++] = ((g << option.patchWidth) | p); + gapVsPatchList_[option.gapVsPatchListCount++] = ((g << option.patchWidth) | p); } } @@ -287,7 +287,7 @@ namespace orc { if (isSigned) { computeZigZagLiterals(option); } - int64_t* currentZigzagLiterals = isSigned ? zigzagLiterals : literals; + int64_t* currentZigzagLiterals = isSigned ? zigzagLiterals_ : literals; option.zzBits100p = percentileBits(currentZigzagLiterals, 0, numLiterals, 1.0); return currentZigzagLiterals; } @@ -318,7 +318,7 @@ namespace orc { int64_t initialDelta = literals[1] - literals[0]; int64_t currDelta = 0; int64_t deltaMax = 0; - adjDeltas[option.adjDeltasCount++] = initialDelta; + adjDeltas_[option.adjDeltasCount++] = initialDelta; for (size_t i = 1; i < numLiterals; i++) { const int64_t l1 = literals[i]; @@ -332,8 +332,8 @@ namespace orc { option.isFixedDelta &= (currDelta == initialDelta); if (i > 1) { - adjDeltas[option.adjDeltasCount++] = std::abs(currDelta); - deltaMax = std::max(deltaMax, adjDeltas[i - 1]); + adjDeltas_[option.adjDeltasCount++] = std::abs(currDelta); + deltaMax = std::max(deltaMax, adjDeltas_[i - 1]); } } @@ -407,15 +407,15 @@ namespace orc { // patching is done only on base reduced values. // remove base from literals for (size_t i = 0; i < numLiterals; i++) { - baseRedLiterals[option.baseRedLiteralsCount++] = (literals[i] - option.min); + baseRedLiterals_[option.baseRedLiteralsCount++] = (literals[i] - option.min); } // 95th percentile width is used to determine max allowed value // after which patching will be done - option.brBits95p = percentileBits(baseRedLiterals, 0, numLiterals, 0.95); + option.brBits95p = percentileBits(baseRedLiterals_, 0, numLiterals, 0.95); // 100th percentile is used to compute the max patch width - option.brBits100p = percentileBits(baseRedLiterals, 0, numLiterals, 1.0, true); + option.brBits100p = percentileBits(baseRedLiterals_, 0, numLiterals, 1.0, true); // after base reducing the values, if the difference in bits between // 95th percentile and 100th percentile value is zero then there @@ -442,16 +442,16 @@ namespace orc { uint64_t RleEncoderV2::flush() { if (numLiterals != 0) { EncodingOption option = {}; - if (variableRunLength != 0) { + if (variableRunLength_ != 0) { determineEncoding(option); writeValues(option); - } else if (fixedRunLength != 0) { - if (fixedRunLength < MIN_REPEAT) { - variableRunLength = fixedRunLength; - fixedRunLength = 0; + } else if (fixedRunLength_ != 0) { + if (fixedRunLength_ < MIN_REPEAT) { + variableRunLength_ = fixedRunLength_; + fixedRunLength_ = 0; determineEncoding(option); writeValues(option); - } else if (fixedRunLength >= MIN_REPEAT && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) { + } else if (fixedRunLength_ >= MIN_REPEAT && fixedRunLength_ <= MAX_SHORT_REPEAT_LENGTH) { option.encoding = SHORT_REPEAT; writeValues(option); } else { @@ -488,7 +488,7 @@ namespace orc { } numLiterals = 0; - prevDelta = 0; + prevDelta_ = 0; } } @@ -506,8 +506,8 @@ namespace orc { uint32_t header = getOpCode(SHORT_REPEAT); - fixedRunLength -= MIN_REPEAT; - header |= fixedRunLength; + fixedRunLength_ -= MIN_REPEAT; + header |= fixedRunLength_; header |= ((numBytesRepeatVal - 1) << 3); writeByte(static_cast(header)); @@ -517,40 +517,40 @@ namespace orc { writeByte(static_cast(b)); } - fixedRunLength = 0; + fixedRunLength_ = 0; } void RleEncoderV2::writeDirectValues(EncodingOption& option) { // write the number of fixed bits required in next 5 bits uint32_t fb = option.zzBits100p; - if (alignedBitPacking) { + if (alignedBitPacking_) { fb = getClosestAlignedFixedBits(fb); } const uint32_t efb = encodeBitWidth(fb) << 1; // adjust variable run length - variableRunLength -= 1; + variableRunLength_ -= 1; // extract the 9th bit of run length - const uint32_t tailBits = (variableRunLength & 0x100) >> 8; + const uint32_t tailBits = (variableRunLength_ & 0x100) >> 8; // create first byte of the header const char headerFirstByte = static_cast(getOpCode(DIRECT) | efb | tailBits); // second byte of the header stores the remaining 8 bits of runlength - const char headerSecondByte = static_cast(variableRunLength & 0xff); + const char headerSecondByte = static_cast(variableRunLength_ & 0xff); // write header writeByte(headerFirstByte); writeByte(headerSecondByte); // bit packing the zigzag encoded literals - int64_t* currentZigzagLiterals = isSigned ? zigzagLiterals : literals; + int64_t* currentZigzagLiterals = isSigned ? zigzagLiterals_ : literals; writeInts(currentZigzagLiterals, 0, numLiterals, fb); // reset run length - variableRunLength = 0; + variableRunLength_ = 0; } void RleEncoderV2::writePatchedBasedValues(EncodingOption& option) { @@ -565,16 +565,16 @@ namespace orc { const uint32_t efb = encodeBitWidth(option.brBits95p) << 1; // adjust variable run length, they are one off - variableRunLength -= 1; + variableRunLength_ -= 1; // extract the 9th bit of run length - const uint32_t tailBits = (variableRunLength & 0x100) >> 8; + const uint32_t tailBits = (variableRunLength_ & 0x100) >> 8; // create first byte of the header const char headerFirstByte = static_cast(getOpCode(PATCHED_BASE) | efb | tailBits); // second byte of the header stores the remaining 8 bits of runlength - const char headerSecondByte = static_cast(variableRunLength & 0xff); + const char headerSecondByte = static_cast(variableRunLength_ & 0xff); // if the min value is negative toggle the sign const bool isNegative = (option.min < 0); @@ -618,15 +618,15 @@ namespace orc { // base reduced literals are bit packed uint32_t closestFixedBits = getClosestFixedBits(option.brBits95p); - writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits); + writeInts(baseRedLiterals_, 0, numLiterals, closestFixedBits); // write patch list closestFixedBits = getClosestFixedBits(option.patchGapWidth + option.patchWidth); - writeInts(gapVsPatchList, 0, option.patchLength, closestFixedBits); + writeInts(gapVsPatchList_, 0, option.patchLength, closestFixedBits); // reset run length - variableRunLength = 0; + variableRunLength_ = 0; } void RleEncoderV2::writeDeltaValues(EncodingOption& option) { @@ -634,7 +634,7 @@ namespace orc { uint32_t fb = option.bitsDeltaMax; uint32_t efb = 0; - if (alignedBitPacking) { + if (alignedBitPacking_) { fb = getClosestAlignedFixedBits(fb); } @@ -642,14 +642,14 @@ namespace orc { // if fixed run length is greater than threshold then it will be fixed // delta sequence with delta value 0 else fixed delta sequence with // non-zero delta value - if (fixedRunLength > MIN_REPEAT) { + if (fixedRunLength_ > MIN_REPEAT) { // ex. sequence: 2 2 2 2 2 2 2 2 - len = fixedRunLength - 1; - fixedRunLength = 0; + len = fixedRunLength_ - 1; + fixedRunLength_ = 0; } else { // ex. sequence: 4 6 8 10 12 14 16 - len = variableRunLength - 1; - variableRunLength = 0; + len = variableRunLength_ - 1; + variableRunLength_ = 0; } } else { // fixed width 0 is used for long repeating values. @@ -658,8 +658,8 @@ namespace orc { fb = 2; } efb = encodeBitWidth(fb) << 1; - len = variableRunLength - 1; - variableRunLength = 0; + len = variableRunLength_ - 1; + variableRunLength_ = 0; } // extract the 9th bit of run length @@ -687,13 +687,13 @@ namespace orc { writeVslong(option.fixedDelta); } else { // store the first value as delta value using zigzag encoding - writeVslong(adjDeltas[0]); + writeVslong(adjDeltas_[0]); // adjacent delta values are bit packed. The length of adjDeltas array is // always one less than the number of literals (delta difference for n // elements is n-1). We have already written one element, write the // remaining numLiterals - 2 elements here - writeInts(adjDeltas, 1, numLiterals - 2, fb); + writeInts(adjDeltas_, 1, numLiterals - 2, fb); } } @@ -776,7 +776,7 @@ namespace orc { void RleEncoderV2::initializeLiterals(int64_t val) { literals[numLiterals++] = val; - fixedRunLength = 1; - variableRunLength = 1; + fixedRunLength_ = 1; + variableRunLength_ = 1; } } // namespace orc diff --git a/c++/src/SchemaEvolution.cc b/c++/src/SchemaEvolution.cc index dc30e611827..4641bf47423 100644 --- a/c++/src/SchemaEvolution.cc +++ b/c++/src/SchemaEvolution.cc @@ -21,20 +21,20 @@ namespace orc { - SchemaEvolution::SchemaEvolution(const std::shared_ptr& _readType, const Type* fileType) - : readType(_readType) { + SchemaEvolution::SchemaEvolution(const std::shared_ptr& readType, const Type* fileType) + : readType_(readType) { if (readType) { buildConversion(readType.get(), fileType); } else { for (uint64_t i = 0; i <= fileType->getMaximumColumnId(); ++i) { - safePPDConversionMap.insert(i); + safePPDConversionMap_.insert(i); } } } const Type* SchemaEvolution::getReadType(const Type& fileType) const { - auto ret = readTypeMap.find(fileType.getColumnId()); - return ret == readTypeMap.cend() ? &fileType : ret->second; + auto ret = readTypeMap_.find(fileType.getColumnId()); + return ret == readTypeMap_.cend() ? &fileType : ret->second; } inline void invalidConversion(const Type* readType, const Type* fileType) { @@ -127,22 +127,22 @@ namespace orc { return ret; } - void SchemaEvolution::buildConversion(const Type* _readType, const Type* fileType) { + void SchemaEvolution::buildConversion(const Type* readType, const Type* fileType) { if (fileType == nullptr) { - throw SchemaEvolutionError("File does not have " + _readType->toString()); + throw SchemaEvolutionError("File does not have " + readType->toString()); } - auto [valid, convert] = checkConversion(*_readType, *fileType); + auto [valid, convert] = checkConversion(*readType, *fileType); if (!valid) { - invalidConversion(_readType, fileType); + invalidConversion(readType, fileType); } - readTypeMap.emplace(_readType->getColumnId(), convert ? _readType : fileType); + readTypeMap_.emplace(readType->getColumnId(), convert ? readType : fileType); // check whether PPD conversion is safe - buildSafePPDConversionMap(_readType, fileType); + buildSafePPDConversionMap(readType, fileType); - for (uint64_t i = 0; i < _readType->getSubtypeCount(); ++i) { - auto subType = _readType->getSubtype(i); + for (uint64_t i = 0; i < readType->getSubtypeCount(); ++i) { + auto subType = readType->getSubtype(i); if (subType) { // null subType means that this is a sub column of map/list type // and it does not exist in the file. simply skip it. @@ -165,17 +165,17 @@ namespace orc { return kind != STRUCT && kind != MAP && kind != LIST && kind != UNION; } - void SchemaEvolution::buildSafePPDConversionMap(const Type* _readType, const Type* fileType) { - if (_readType == nullptr || !isPrimitive(_readType) || fileType == nullptr || + void SchemaEvolution::buildSafePPDConversionMap(const Type* readType, const Type* fileType) { + if (readType == nullptr || !isPrimitive(readType) || fileType == nullptr || !isPrimitive(fileType)) { return; } bool isSafe = false; - if (_readType == fileType) { + if (readType == fileType) { // short cut for same type isSafe = true; - } else if (_readType->getKind() == DECIMAL && fileType->getKind() == DECIMAL) { + } else if (readType->getKind() == DECIMAL && fileType->getKind() == DECIMAL) { // for decimals alone do equality check to not mess up with precision change if (fileType->getPrecision() == readType->getPrecision() && fileType->getScale() == readType->getScale()) { @@ -245,12 +245,12 @@ namespace orc { } if (isSafe) { - safePPDConversionMap.insert(fileType->getColumnId()); + safePPDConversionMap_.insert(fileType->getColumnId()); } } bool SchemaEvolution::isSafePPDConversion(uint64_t columnId) const { - return safePPDConversionMap.find(columnId) != safePPDConversionMap.cend(); + return safePPDConversionMap_.find(columnId) != safePPDConversionMap_.cend(); } } // namespace orc diff --git a/c++/src/SchemaEvolution.hh b/c++/src/SchemaEvolution.hh index ef9020eba48..c3deff72363 100644 --- a/c++/src/SchemaEvolution.hh +++ b/c++/src/SchemaEvolution.hh @@ -46,7 +46,7 @@ namespace orc { // return selected read type const Type* getReadType() const { - return readType.get(); + return readType_.get(); } private: @@ -54,9 +54,9 @@ namespace orc { void buildSafePPDConversionMap(const Type* readType, const Type* fileType); private: - const std::shared_ptr readType; - std::unordered_map readTypeMap; - std::unordered_set safePPDConversionMap; + const std::shared_ptr readType_; + std::unordered_map readTypeMap_; + std::unordered_set safePPDConversionMap_; }; } // namespace orc diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc index 8ed29d0e7c1..f9581215b32 100644 --- a/c++/src/Statistics.cc +++ b/c++/src/Statistics.cc @@ -52,18 +52,18 @@ namespace orc { StatisticsImpl::StatisticsImpl(const proto::StripeStatistics& stripeStats, const StatContext& statContext) { for (int i = 0; i < stripeStats.col_stats_size(); i++) { - colStats.push_back(convertColumnStatistics(stripeStats.col_stats(i), statContext)); + colStats_.push_back(convertColumnStatistics(stripeStats.col_stats(i), statContext)); } } StatisticsImpl::StatisticsImpl(const proto::Footer& footer, const StatContext& statContext) { for (int i = 0; i < footer.statistics_size(); i++) { - colStats.push_back(convertColumnStatistics(footer.statistics(i), statContext)); + colStats_.push_back(convertColumnStatistics(footer.statistics(i), statContext)); } } StatisticsImpl::~StatisticsImpl() { - for (std::vector::iterator ptr = colStats.begin(); ptr != colStats.end(); + for (std::vector::iterator ptr = colStats_.begin(); ptr != colStats_.end(); ++ptr) { delete *ptr; } @@ -85,11 +85,11 @@ namespace orc { const proto::StripeStatistics& stripeStats, std::vector >& indexStats, const StatContext& statContext) { - columnStats = std::make_unique(stripeStats, statContext); - rowIndexStats.resize(indexStats.size()); - for (size_t i = 0; i < rowIndexStats.size(); i++) { + columnStats_ = std::make_unique(stripeStats, statContext); + rowIndexStats_.resize(indexStats.size()); + for (size_t i = 0; i < rowIndexStats_.size(); i++) { for (size_t j = 0; j < indexStats[i].size(); j++) { - rowIndexStats[i].push_back(std::shared_ptr( + rowIndexStats_[i].push_back(std::shared_ptr( convertColumnStatistics(indexStats[i][j], statContext))); } } @@ -180,205 +180,205 @@ namespace orc { } ColumnStatisticsImpl::ColumnStatisticsImpl(const proto::ColumnStatistics& pb) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); } BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (pb.has_binary_statistics() && statContext.correctStats) { - _stats.setHasTotalLength(pb.binary_statistics().has_sum()); - _stats.setTotalLength(static_cast(pb.binary_statistics().sum())); + stats_.setHasTotalLength(pb.binary_statistics().has_sum()); + stats_.setTotalLength(static_cast(pb.binary_statistics().sum())); } } BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (pb.has_bucket_statistics() && statContext.correctStats) { - _hasCount = true; - _trueCount = pb.bucket_statistics().count(0); + hasCount_ = true; + trueCount_ = pb.bucket_statistics().count(0); } else { - _hasCount = false; - _trueCount = 0; + hasCount_ = false; + trueCount_ = 0; } } DateColumnStatisticsImpl::DateColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (!pb.has_date_statistics() || !statContext.correctStats) { // hasMinimum_ is false by default; // hasMaximum_ is false by default; - _stats.setMinimum(0); - _stats.setMaximum(0); + stats_.setMinimum(0); + stats_.setMaximum(0); } else { - _stats.setHasMinimum(pb.date_statistics().has_minimum()); - _stats.setHasMaximum(pb.date_statistics().has_maximum()); - _stats.setMinimum(pb.date_statistics().minimum()); - _stats.setMaximum(pb.date_statistics().maximum()); + stats_.setHasMinimum(pb.date_statistics().has_minimum()); + stats_.setHasMaximum(pb.date_statistics().has_maximum()); + stats_.setMinimum(pb.date_statistics().minimum()); + stats_.setMaximum(pb.date_statistics().maximum()); } } DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (pb.has_decimal_statistics() && statContext.correctStats) { const proto::DecimalStatistics& stats = pb.decimal_statistics(); - _stats.setHasMinimum(stats.has_minimum()); - _stats.setHasMaximum(stats.has_maximum()); - _stats.setHasSum(stats.has_sum()); + stats_.setHasMinimum(stats.has_minimum()); + stats_.setHasMaximum(stats.has_maximum()); + stats_.setHasSum(stats.has_sum()); - _stats.setMinimum(Decimal(stats.minimum())); - _stats.setMaximum(Decimal(stats.maximum())); - _stats.setSum(Decimal(stats.sum())); + stats_.setMinimum(Decimal(stats.minimum())); + stats_.setMaximum(Decimal(stats.maximum())); + stats_.setSum(Decimal(stats.sum())); } } DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl(const proto::ColumnStatistics& pb) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (!pb.has_double_statistics()) { - _stats.setMinimum(0); - _stats.setMaximum(0); - _stats.setSum(0); + stats_.setMinimum(0); + stats_.setMaximum(0); + stats_.setSum(0); } else { const proto::DoubleStatistics& stats = pb.double_statistics(); - _stats.setHasMinimum(stats.has_minimum()); - _stats.setHasMaximum(stats.has_maximum()); - _stats.setHasSum(stats.has_sum()); + stats_.setHasMinimum(stats.has_minimum()); + stats_.setHasMaximum(stats.has_maximum()); + stats_.setHasSum(stats.has_sum()); - _stats.setMinimum(stats.minimum()); - _stats.setMaximum(stats.maximum()); - _stats.setSum(stats.sum()); + stats_.setMinimum(stats.minimum()); + stats_.setMaximum(stats.maximum()); + stats_.setSum(stats.sum()); } } IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl(const proto::ColumnStatistics& pb) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (!pb.has_int_statistics()) { - _stats.setMinimum(0); - _stats.setMaximum(0); - _stats.setSum(0); + stats_.setMinimum(0); + stats_.setMaximum(0); + stats_.setSum(0); } else { const proto::IntegerStatistics& stats = pb.int_statistics(); - _stats.setHasMinimum(stats.has_minimum()); - _stats.setHasMaximum(stats.has_maximum()); - _stats.setHasSum(stats.has_sum()); + stats_.setHasMinimum(stats.has_minimum()); + stats_.setHasMaximum(stats.has_maximum()); + stats_.setHasSum(stats.has_sum()); - _stats.setMinimum(stats.minimum()); - _stats.setMaximum(stats.maximum()); - _stats.setSum(stats.sum()); + stats_.setMinimum(stats.minimum()); + stats_.setMaximum(stats.maximum()); + stats_.setSum(stats.sum()); } } StringColumnStatisticsImpl::StringColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (!pb.has_string_statistics() || !statContext.correctStats) { - _stats.setTotalLength(0); + stats_.setTotalLength(0); } else { const proto::StringStatistics& stats = pb.string_statistics(); - _stats.setHasMinimum(stats.has_minimum()); - _stats.setHasMaximum(stats.has_maximum()); - _stats.setHasTotalLength(stats.has_sum()); + stats_.setHasMinimum(stats.has_minimum()); + stats_.setHasMaximum(stats.has_maximum()); + stats_.setHasTotalLength(stats.has_sum()); - _stats.setMinimum(stats.minimum()); - _stats.setMaximum(stats.maximum()); - _stats.setTotalLength(static_cast(stats.sum())); + stats_.setMinimum(stats.minimum()); + stats_.setMaximum(stats.maximum()); + stats_.setTotalLength(static_cast(stats.sum())); } } TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (!pb.has_timestamp_statistics() || !statContext.correctStats) { - _stats.setMinimum(0); - _stats.setMaximum(0); - _lowerBound = 0; - _upperBound = 0; - _minimumNanos = DEFAULT_MIN_NANOS; - _maximumNanos = DEFAULT_MAX_NANOS; + stats_.setMinimum(0); + stats_.setMaximum(0); + lowerBound_ = 0; + upperBound_ = 0; + minimumNanos_ = DEFAULT_MIN_NANOS; + maximumNanos_ = DEFAULT_MAX_NANOS; } else { const proto::TimestampStatistics& stats = pb.timestamp_statistics(); - _stats.setHasMinimum(stats.has_minimum_utc() || + stats_.setHasMinimum(stats.has_minimum_utc() || (stats.has_minimum() && (statContext.writerTimezone != nullptr))); - _stats.setHasMaximum(stats.has_maximum_utc() || + stats_.setHasMaximum(stats.has_maximum_utc() || (stats.has_maximum() && (statContext.writerTimezone != nullptr))); - _hasLowerBound = stats.has_minimum_utc() || stats.has_minimum(); - _hasUpperBound = stats.has_maximum_utc() || stats.has_maximum(); + hasLowerBound_ = stats.has_minimum_utc() || stats.has_minimum(); + hasUpperBound_ = stats.has_maximum_utc() || stats.has_maximum(); // to be consistent with java side, non-default minimum_nanos and maximum_nanos // are added by one in their serialized form. - _minimumNanos = stats.has_minimum_nanos() ? stats.minimum_nanos() - 1 : DEFAULT_MIN_NANOS; - _maximumNanos = stats.has_maximum_nanos() ? stats.maximum_nanos() - 1 : DEFAULT_MAX_NANOS; + minimumNanos_ = stats.has_minimum_nanos() ? stats.minimum_nanos() - 1 : DEFAULT_MIN_NANOS; + maximumNanos_ = stats.has_maximum_nanos() ? stats.maximum_nanos() - 1 : DEFAULT_MAX_NANOS; // Timestamp stats are stored in milliseconds if (stats.has_minimum_utc()) { int64_t minimum = stats.minimum_utc(); - _stats.setMinimum(minimum); - _lowerBound = minimum; + stats_.setMinimum(minimum); + lowerBound_ = minimum; } else if (statContext.writerTimezone) { int64_t writerTimeSec = stats.minimum() / 1000; // multiply the offset by 1000 to convert to millisecond int64_t minimum = stats.minimum() + (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000; - _stats.setMinimum(minimum); - _lowerBound = minimum; + stats_.setMinimum(minimum); + lowerBound_ = minimum; } else { - _stats.setMinimum(0); + stats_.setMinimum(0); // subtract 1 day 1 hour (25 hours) in milliseconds to handle unknown // TZ and daylight savings - _lowerBound = stats.minimum() - (25 * SECONDS_PER_HOUR * 1000); + lowerBound_ = stats.minimum() - (25 * SECONDS_PER_HOUR * 1000); } // Timestamp stats are stored in milliseconds if (stats.has_maximum_utc()) { int64_t maximum = stats.maximum_utc(); - _stats.setMaximum(maximum); - _upperBound = maximum; + stats_.setMaximum(maximum); + upperBound_ = maximum; } else if (statContext.writerTimezone) { int64_t writerTimeSec = stats.maximum() / 1000; // multiply the offset by 1000 to convert to millisecond int64_t maximum = stats.maximum() + (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000; - _stats.setMaximum(maximum); - _upperBound = maximum; + stats_.setMaximum(maximum); + upperBound_ = maximum; } else { - _stats.setMaximum(0); + stats_.setMaximum(0); // add 1 day 1 hour (25 hours) in milliseconds to handle unknown // TZ and daylight savings - _upperBound = stats.maximum() + (25 * SECONDS_PER_HOUR * 1000); + upperBound_ = stats.maximum() + (25 * SECONDS_PER_HOUR * 1000); } // Add 1 millisecond to account for microsecond precision of values - _upperBound += 1; + upperBound_ += 1; } } CollectionColumnStatisticsImpl::CollectionColumnStatisticsImpl( const proto::ColumnStatistics& pb) { - _stats.setNumberOfValues(pb.number_of_values()); - _stats.setHasNull(pb.has_null()); + stats_.setNumberOfValues(pb.number_of_values()); + stats_.setHasNull(pb.has_null()); if (!pb.has_collection_statistics()) { - _stats.setMinimum(0); - _stats.setMaximum(0); - _stats.setSum(0); + stats_.setMinimum(0); + stats_.setMaximum(0); + stats_.setSum(0); } else { const proto::CollectionStatistics& stats = pb.collection_statistics(); - _stats.setHasMinimum(stats.has_min_children()); - _stats.setHasMaximum(stats.has_max_children()); - _stats.setHasSum(stats.has_total_children()); + stats_.setHasMinimum(stats.has_min_children()); + stats_.setHasMaximum(stats.has_max_children()); + stats_.setHasSum(stats.has_total_children()); - _stats.setMinimum(stats.min_children()); - _stats.setMaximum(stats.max_children()); - _stats.setSum(stats.total_children()); + stats_.setMinimum(stats.min_children()); + stats_.setMaximum(stats.max_children()); + stats_.setSum(stats.total_children()); } } diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh index e585bf971cc..6f212c15ccd 100644 --- a/c++/src/Statistics.hh +++ b/c++/src/Statistics.hh @@ -48,160 +48,160 @@ namespace orc { template class InternalStatisticsImpl { private: - bool _hasNull; - bool _hasMinimum; - bool _hasMaximum; - bool _hasSum; - bool _hasTotalLength; - uint64_t _totalLength; - uint64_t _valueCount; - T _minimum; - T _maximum; - T _sum; + bool hasNull_; + bool hasMinimum_; + bool hasMaximum_; + bool hasSum_; + bool hasTotalLength_; + uint64_t totalLength_; + uint64_t valueCount_; + T minimum_; + T maximum_; + T sum_; public: InternalStatisticsImpl() { - _hasNull = false; - _hasMinimum = false; - _hasMaximum = false; - _hasSum = false; - _hasTotalLength = false; - _totalLength = 0; - _valueCount = 0; + hasNull_ = false; + hasMinimum_ = false; + hasMaximum_ = false; + hasSum_ = false; + hasTotalLength_ = false; + totalLength_ = 0; + valueCount_ = 0; } ~InternalStatisticsImpl() {} // GET / SET _totalLength bool hasTotalLength() const { - return _hasTotalLength; + return hasTotalLength_; } void setHasTotalLength(bool hasTotalLength) { - _hasTotalLength = hasTotalLength; + hasTotalLength_ = hasTotalLength; } uint64_t getTotalLength() const { - return _totalLength; + return totalLength_; } void setTotalLength(uint64_t totalLength) { - _totalLength = totalLength; + totalLength_ = totalLength; } // GET / SET _sum bool hasSum() const { - return _hasSum; + return hasSum_; } void setHasSum(bool hasSum) { - _hasSum = hasSum; + hasSum_ = hasSum; } T getSum() const { - return _sum; + return sum_; } void setSum(T sum) { - _sum = sum; + sum_ = sum; } // GET / SET _maximum bool hasMaximum() const { - return _hasMaximum; + return hasMaximum_; } const T& getMaximum() const { - return _maximum; + return maximum_; } void setHasMaximum(bool hasMax) { - _hasMaximum = hasMax; + hasMaximum_ = hasMax; } void setMaximum(T max) { - _maximum = max; + maximum_ = max; } // GET / SET _minimum bool hasMinimum() const { - return _hasMinimum; + return hasMinimum_; } void setHasMinimum(bool hasMin) { - _hasMinimum = hasMin; + hasMinimum_ = hasMin; } const T& getMinimum() const { - return _minimum; + return minimum_; } void setMinimum(T min) { - _minimum = min; + minimum_ = min; } // GET / SET _valueCount uint64_t getNumberOfValues() const { - return _valueCount; + return valueCount_; } void setNumberOfValues(uint64_t numValues) { - _valueCount = numValues; + valueCount_ = numValues; } // GET / SET _hasNullValue bool hasNull() const { - return _hasNull; + return hasNull_; } void setHasNull(bool hasNull) { - _hasNull = hasNull; + hasNull_ = hasNull; } void reset() { - _hasNull = false; - _hasMinimum = false; - _hasMaximum = false; - _hasSum = false; - _hasTotalLength = false; - _totalLength = 0; - _valueCount = 0; + hasNull_ = false; + hasMinimum_ = false; + hasMaximum_ = false; + hasSum_ = false; + hasTotalLength_ = false; + totalLength_ = 0; + valueCount_ = 0; } void updateMinMax(T value) { - if (!_hasMinimum) { - _hasMinimum = _hasMaximum = true; - _minimum = _maximum = value; - } else if (compare(value, _minimum)) { - _minimum = value; - } else if (compare(_maximum, value)) { - _maximum = value; + if (!hasMinimum_) { + hasMinimum_ = hasMaximum_ = true; + minimum_ = maximum_ = value; + } else if (compare(value, minimum_)) { + minimum_ = value; + } else if (compare(maximum_, value)) { + maximum_ = value; } } // sum is not merged here as we need to check overflow void merge(const InternalStatisticsImpl& other) { - _hasNull = _hasNull || other._hasNull; - _valueCount += other._valueCount; - - if (other._hasMinimum) { - if (!_hasMinimum) { - _hasMinimum = _hasMaximum = true; - _minimum = other._minimum; - _maximum = other._maximum; + hasNull_ = hasNull_ || other.hasNull_; + valueCount_ += other.valueCount_; + + if (other.hasMinimum_) { + if (!hasMinimum_) { + hasMinimum_ = hasMaximum_ = true; + minimum_ = other.minimum_; + maximum_ = other.maximum_; } else { // all template types should support operator< - if (compare(_maximum, other._maximum)) { - _maximum = other._maximum; + if (compare(maximum_, other.maximum_)) { + maximum_ = other.maximum_; } - if (compare(other._minimum, _minimum)) { - _minimum = other._minimum; + if (compare(other.minimum_, minimum_)) { + minimum_ = other.minimum_; } } } - _hasTotalLength = _hasTotalLength && other._hasTotalLength; - _totalLength += other._totalLength; + hasTotalLength_ = hasTotalLength_ && other.hasTotalLength_; + totalLength_ += other.totalLength_; } }; @@ -240,7 +240,7 @@ namespace orc { class ColumnStatisticsImpl : public ColumnStatistics, public MutableColumnStatistics { private: - InternalCharStatistics _stats; + InternalCharStatistics stats_; public: ColumnStatisticsImpl() { @@ -250,36 +250,36 @@ namespace orc { virtual ~ColumnStatisticsImpl() override; uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } void merge(const MutableColumnStatistics& other) override { - _stats.merge(dynamic_cast(other)._stats); + stats_.merge(dynamic_cast(other).stats_); } void reset() override { - _stats.reset(); + stats_.reset(); } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); } std::string toString() const override { @@ -292,7 +292,7 @@ namespace orc { class BinaryColumnStatisticsImpl : public BinaryColumnStatistics, public MutableColumnStatistics { private: - InternalCharStatistics _stats; + InternalCharStatistics stats_; public: BinaryColumnStatisticsImpl() { @@ -303,63 +303,63 @@ namespace orc { virtual ~BinaryColumnStatisticsImpl() override; uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } bool hasTotalLength() const override { - return _stats.hasTotalLength(); + return stats_.hasTotalLength(); } uint64_t getTotalLength() const override { if (hasTotalLength()) { - return _stats.getTotalLength(); + return stats_.getTotalLength(); } else { throw ParseError("Total length is not defined."); } } void setTotalLength(uint64_t length) { - _stats.setHasTotalLength(true); - _stats.setTotalLength(length); + stats_.setHasTotalLength(true); + stats_.setTotalLength(length); } void update(size_t length) { - _stats.setTotalLength(_stats.getTotalLength() + length); + stats_.setTotalLength(stats_.getTotalLength() + length); } void merge(const MutableColumnStatistics& other) override { const BinaryColumnStatisticsImpl& binStats = dynamic_cast(other); - _stats.merge(binStats._stats); + stats_.merge(binStats.stats_); } void reset() override { - _stats.reset(); + stats_.reset(); setTotalLength(0); } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::BinaryStatistics* binStats = pbStats.mutable_binary_statistics(); - binStats->set_sum(static_cast(_stats.getTotalLength())); + binStats->set_sum(static_cast(stats_.getTotalLength())); } std::string toString() const override { @@ -379,9 +379,9 @@ namespace orc { class BooleanColumnStatisticsImpl : public BooleanColumnStatistics, public MutableColumnStatistics { private: - InternalBooleanStatistics _stats; - bool _hasCount; - uint64_t _trueCount; + InternalBooleanStatistics stats_; + bool hasCount_; + uint64_t trueCount_; public: BooleanColumnStatisticsImpl() { @@ -392,33 +392,33 @@ namespace orc { virtual ~BooleanColumnStatisticsImpl() override; bool hasCount() const override { - return _hasCount; + return hasCount_; } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); - _hasCount = true; + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); + hasCount_ = true; } uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } uint64_t getFalseCount() const override { if (hasCount()) { - return getNumberOfValues() - _trueCount; + return getNumberOfValues() - trueCount_; } else { throw ParseError("False count is not defined."); } @@ -426,43 +426,43 @@ namespace orc { uint64_t getTrueCount() const override { if (hasCount()) { - return _trueCount; + return trueCount_; } else { throw ParseError("True count is not defined."); } } void setTrueCount(uint64_t trueCount) { - _hasCount = true; - _trueCount = trueCount; + hasCount_ = true; + trueCount_ = trueCount; } void update(bool value, size_t repetitions) { if (value) { - _trueCount += repetitions; + trueCount_ += repetitions; } } void merge(const MutableColumnStatistics& other) override { const BooleanColumnStatisticsImpl& boolStats = dynamic_cast(other); - _stats.merge(boolStats._stats); - _hasCount = _hasCount && boolStats._hasCount; - _trueCount += boolStats._trueCount; + stats_.merge(boolStats.stats_); + hasCount_ = hasCount_ && boolStats.hasCount_; + trueCount_ += boolStats.trueCount_; } void reset() override { - _stats.reset(); + stats_.reset(); setTrueCount(0); } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::BucketStatistics* bucketStats = pbStats.mutable_bucket_statistics(); - if (_hasCount) { - bucketStats->add_count(_trueCount); + if (hasCount_) { + bucketStats->add_count(trueCount_); } else { bucketStats->clear_count(); } @@ -485,7 +485,7 @@ namespace orc { class DateColumnStatisticsImpl : public DateColumnStatistics, public MutableColumnStatistics { private: - InternalDateStatistics _stats; + InternalDateStatistics stats_; public: DateColumnStatisticsImpl() { @@ -495,36 +495,36 @@ namespace orc { virtual ~DateColumnStatisticsImpl() override; bool hasMinimum() const override { - return _stats.hasMinimum(); + return stats_.hasMinimum(); } bool hasMaximum() const override { - return _stats.hasMaximum(); + return stats_.hasMaximum(); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } int32_t getMinimum() const override { if (hasMinimum()) { - return _stats.getMinimum(); + return stats_.getMinimum(); } else { throw ParseError("Minimum is not defined."); } @@ -532,44 +532,44 @@ namespace orc { int32_t getMaximum() const override { if (hasMaximum()) { - return _stats.getMaximum(); + return stats_.getMaximum(); } else { throw ParseError("Maximum is not defined."); } } void setMinimum(int32_t minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); + stats_.setHasMinimum(true); + stats_.setMinimum(minimum); } void setMaximum(int32_t maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); + stats_.setHasMaximum(true); + stats_.setMaximum(maximum); } void update(int32_t value) { - _stats.updateMinMax(value); + stats_.updateMinMax(value); } void merge(const MutableColumnStatistics& other) override { const DateColumnStatisticsImpl& dateStats = dynamic_cast(other); - _stats.merge(dateStats._stats); + stats_.merge(dateStats.stats_); } void reset() override { - _stats.reset(); + stats_.reset(); } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::DateStatistics* dateStatistics = pbStats.mutable_date_statistics(); - if (_stats.hasMinimum()) { - dateStatistics->set_maximum(_stats.getMaximum()); - dateStatistics->set_minimum(_stats.getMinimum()); + if (stats_.hasMinimum()) { + dateStatistics->set_maximum(stats_.getMaximum()); + dateStatistics->set_minimum(stats_.getMinimum()); } else { dateStatistics->clear_minimum(); dateStatistics->clear_maximum(); @@ -599,7 +599,7 @@ namespace orc { class DecimalColumnStatisticsImpl : public DecimalColumnStatistics, public MutableColumnStatistics { private: - InternalDecimalStatistics _stats; + InternalDecimalStatistics stats_; public: DecimalColumnStatisticsImpl() { @@ -610,40 +610,40 @@ namespace orc { virtual ~DecimalColumnStatisticsImpl() override; bool hasMinimum() const override { - return _stats.hasMinimum(); + return stats_.hasMinimum(); } bool hasMaximum() const override { - return _stats.hasMaximum(); + return stats_.hasMaximum(); } bool hasSum() const override { - return _stats.hasSum(); + return stats_.hasSum(); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } Decimal getMinimum() const override { if (hasMinimum()) { - return _stats.getMinimum(); + return stats_.getMinimum(); } else { throw ParseError("Minimum is not defined."); } @@ -651,39 +651,39 @@ namespace orc { Decimal getMaximum() const override { if (hasMaximum()) { - return _stats.getMaximum(); + return stats_.getMaximum(); } else { throw ParseError("Maximum is not defined."); } } void setMinimum(Decimal minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); + stats_.setHasMinimum(true); + stats_.setMinimum(minimum); } void setMaximum(Decimal maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); + stats_.setHasMaximum(true); + stats_.setMaximum(maximum); } Decimal getSum() const override { if (hasSum()) { - return _stats.getSum(); + return stats_.getSum(); } else { throw ParseError("Sum is not defined."); } } void setSum(Decimal sum) { - _stats.setHasSum(true); - _stats.setSum(sum); + stats_.setHasSum(true); + stats_.setSum(sum); } void update(const Decimal& value) { - _stats.updateMinMax(value); + stats_.updateMinMax(value); - if (_stats.hasSum()) { + if (stats_.hasSum()) { updateSum(value); } } @@ -692,33 +692,33 @@ namespace orc { const DecimalColumnStatisticsImpl& decStats = dynamic_cast(other); - _stats.merge(decStats._stats); + stats_.merge(decStats.stats_); - _stats.setHasSum(_stats.hasSum() && decStats.hasSum()); - if (_stats.hasSum()) { + stats_.setHasSum(stats_.hasSum() && decStats.hasSum()); + if (stats_.hasSum()) { updateSum(decStats.getSum()); } } void reset() override { - _stats.reset(); + stats_.reset(); setSum(Decimal()); } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::DecimalStatistics* decStats = pbStats.mutable_decimal_statistics(); - if (_stats.hasMinimum()) { - decStats->set_minimum(_stats.getMinimum().toString(true)); - decStats->set_maximum(_stats.getMaximum().toString(true)); + if (stats_.hasMinimum()) { + decStats->set_minimum(stats_.getMinimum().toString(true)); + decStats->set_maximum(stats_.getMaximum().toString(true)); } else { decStats->clear_minimum(); decStats->clear_maximum(); } - if (_stats.hasSum()) { - decStats->set_sum(_stats.getSum().toString(true)); + if (stats_.hasSum()) { + decStats->set_sum(stats_.getSum().toString(true)); } else { decStats->clear_sum(); } @@ -752,9 +752,9 @@ namespace orc { private: void updateSum(Decimal value) { - if (_stats.hasSum()) { + if (stats_.hasSum()) { bool overflow = false; - Decimal sum = _stats.getSum(); + Decimal sum = stats_.getSum(); if (sum.scale > value.scale) { value.value = scaleUpInt128ByPowerOfTen(value.value, sum.scale - value.scale, overflow); } else if (sum.scale < value.scale) { @@ -766,14 +766,14 @@ namespace orc { bool wasPositive = sum.value >= 0; sum.value += value.value; if ((value.value >= 0) == wasPositive) { - _stats.setHasSum((sum.value >= 0) == wasPositive); + stats_.setHasSum((sum.value >= 0) == wasPositive); } } else { - _stats.setHasSum(false); + stats_.setHasSum(false); } - if (_stats.hasSum()) { - _stats.setSum(sum); + if (stats_.hasSum()) { + stats_.setSum(sum); } } } @@ -781,7 +781,7 @@ namespace orc { class DoubleColumnStatisticsImpl : public DoubleColumnStatistics, public MutableColumnStatistics { private: - InternalDoubleStatistics _stats; + InternalDoubleStatistics stats_; public: DoubleColumnStatisticsImpl() { @@ -791,40 +791,40 @@ namespace orc { virtual ~DoubleColumnStatisticsImpl() override; bool hasMinimum() const override { - return _stats.hasMinimum(); + return stats_.hasMinimum(); } bool hasMaximum() const override { - return _stats.hasMaximum(); + return stats_.hasMaximum(); } bool hasSum() const override { - return _stats.hasSum(); + return stats_.hasSum(); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } double getMinimum() const override { if (hasMinimum()) { - return _stats.getMinimum(); + return stats_.getMinimum(); } else { throw ParseError("Minimum is not defined."); } @@ -832,70 +832,70 @@ namespace orc { double getMaximum() const override { if (hasMaximum()) { - return _stats.getMaximum(); + return stats_.getMaximum(); } else { throw ParseError("Maximum is not defined."); } } void setMinimum(double minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); + stats_.setHasMinimum(true); + stats_.setMinimum(minimum); } void setMaximum(double maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); + stats_.setHasMaximum(true); + stats_.setMaximum(maximum); } double getSum() const override { if (hasSum()) { - return _stats.getSum(); + return stats_.getSum(); } else { throw ParseError("Sum is not defined."); } } void setSum(double sum) { - _stats.setHasSum(true); - _stats.setSum(sum); + stats_.setHasSum(true); + stats_.setSum(sum); } void update(double value) { - _stats.updateMinMax(value); - _stats.setSum(_stats.getSum() + value); + stats_.updateMinMax(value); + stats_.setSum(stats_.getSum() + value); } void merge(const MutableColumnStatistics& other) override { const DoubleColumnStatisticsImpl& doubleStats = dynamic_cast(other); - _stats.merge(doubleStats._stats); + stats_.merge(doubleStats.stats_); - _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum()); - if (_stats.hasSum()) { - _stats.setSum(_stats.getSum() + doubleStats.getSum()); + stats_.setHasSum(stats_.hasSum() && doubleStats.hasSum()); + if (stats_.hasSum()) { + stats_.setSum(stats_.getSum() + doubleStats.getSum()); } } void reset() override { - _stats.reset(); + stats_.reset(); setSum(0.0); } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::DoubleStatistics* doubleStats = pbStats.mutable_double_statistics(); - if (_stats.hasMinimum()) { - doubleStats->set_minimum(_stats.getMinimum()); - doubleStats->set_maximum(_stats.getMaximum()); + if (stats_.hasMinimum()) { + doubleStats->set_minimum(stats_.getMinimum()); + doubleStats->set_maximum(stats_.getMaximum()); } else { doubleStats->clear_minimum(); doubleStats->clear_maximum(); } - if (_stats.hasSum()) { - doubleStats->set_sum(_stats.getSum()); + if (stats_.hasSum()) { + doubleStats->set_sum(stats_.getSum()); } else { doubleStats->clear_sum(); } @@ -930,7 +930,7 @@ namespace orc { class IntegerColumnStatisticsImpl : public IntegerColumnStatistics, public MutableColumnStatistics { private: - InternalIntegerStatistics _stats; + InternalIntegerStatistics stats_; public: IntegerColumnStatisticsImpl() { @@ -940,40 +940,40 @@ namespace orc { virtual ~IntegerColumnStatisticsImpl() override; bool hasMinimum() const override { - return _stats.hasMinimum(); + return stats_.hasMinimum(); } bool hasMaximum() const override { - return _stats.hasMaximum(); + return stats_.hasMaximum(); } bool hasSum() const override { - return _stats.hasSum(); + return stats_.hasSum(); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } int64_t getMinimum() const override { if (hasMinimum()) { - return _stats.getMinimum(); + return stats_.getMinimum(); } else { throw ParseError("Minimum is not defined."); } @@ -981,48 +981,48 @@ namespace orc { int64_t getMaximum() const override { if (hasMaximum()) { - return _stats.getMaximum(); + return stats_.getMaximum(); } else { throw ParseError("Maximum is not defined."); } } void setMinimum(int64_t minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); + stats_.setHasMinimum(true); + stats_.setMinimum(minimum); } void setMaximum(int64_t maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); + stats_.setHasMaximum(true); + stats_.setMaximum(maximum); } int64_t getSum() const override { if (hasSum()) { - return _stats.getSum(); + return stats_.getSum(); } else { throw ParseError("Sum is not defined."); } } void setSum(int64_t sum) { - _stats.setHasSum(true); - _stats.setSum(sum); + stats_.setHasSum(true); + stats_.setSum(sum); } void update(int64_t value, int repetitions) { - _stats.updateMinMax(value); + stats_.updateMinMax(value); - if (_stats.hasSum()) { + if (stats_.hasSum()) { if (repetitions > 1) { - _stats.setHasSum(multiplyExact(value, repetitions, &value)); + stats_.setHasSum(multiplyExact(value, repetitions, &value)); } - if (_stats.hasSum()) { - _stats.setHasSum(addExact(_stats.getSum(), value, &value)); + if (stats_.hasSum()) { + stats_.setHasSum(addExact(stats_.getSum(), value, &value)); - if (_stats.hasSum()) { - _stats.setSum(value); + if (stats_.hasSum()) { + stats_.setSum(value); } } } @@ -1032,38 +1032,38 @@ namespace orc { const IntegerColumnStatisticsImpl& intStats = dynamic_cast(other); - _stats.merge(intStats._stats); + stats_.merge(intStats.stats_); // update sum and check overflow - _stats.setHasSum(_stats.hasSum() && intStats.hasSum()); - if (_stats.hasSum()) { + stats_.setHasSum(stats_.hasSum() && intStats.hasSum()); + if (stats_.hasSum()) { int64_t value; - _stats.setHasSum(addExact(_stats.getSum(), intStats.getSum(), &value)); - if (_stats.hasSum()) { - _stats.setSum(value); + stats_.setHasSum(addExact(stats_.getSum(), intStats.getSum(), &value)); + if (stats_.hasSum()) { + stats_.setSum(value); } } } void reset() override { - _stats.reset(); + stats_.reset(); setSum(0); } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::IntegerStatistics* intStats = pbStats.mutable_int_statistics(); - if (_stats.hasMinimum()) { - intStats->set_minimum(_stats.getMinimum()); - intStats->set_maximum(_stats.getMaximum()); + if (stats_.hasMinimum()) { + intStats->set_minimum(stats_.getMinimum()); + intStats->set_maximum(stats_.getMaximum()); } else { intStats->clear_minimum(); intStats->clear_maximum(); } - if (_stats.hasSum()) { - intStats->set_sum(_stats.getSum()); + if (stats_.hasSum()) { + intStats->set_sum(stats_.getSum()); } else { intStats->clear_sum(); } @@ -1097,7 +1097,7 @@ namespace orc { class StringColumnStatisticsImpl : public StringColumnStatistics, public MutableColumnStatistics { private: - InternalStringStatistics _stats; + InternalStringStatistics stats_; public: StringColumnStatisticsImpl() { @@ -1108,40 +1108,40 @@ namespace orc { virtual ~StringColumnStatisticsImpl() override; bool hasMinimum() const override { - return _stats.hasMinimum(); + return stats_.hasMinimum(); } bool hasMaximum() const override { - return _stats.hasMaximum(); + return stats_.hasMaximum(); } bool hasTotalLength() const override { - return _stats.hasTotalLength(); + return stats_.hasTotalLength(); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } const std::string& getMinimum() const override { if (hasMinimum()) { - return _stats.getMinimum(); + return stats_.getMinimum(); } else { throw ParseError("Minimum is not defined."); } @@ -1149,59 +1149,59 @@ namespace orc { const std::string& getMaximum() const override { if (hasMaximum()) { - return _stats.getMaximum(); + return stats_.getMaximum(); } else { throw ParseError("Maximum is not defined."); } } void setMinimum(std::string minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); + stats_.setHasMinimum(true); + stats_.setMinimum(minimum); } void setMaximum(std::string maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); + stats_.setHasMaximum(true); + stats_.setMaximum(maximum); } uint64_t getTotalLength() const override { if (hasTotalLength()) { - return _stats.getTotalLength(); + return stats_.getTotalLength(); } else { throw ParseError("Total length is not defined."); } } void setTotalLength(uint64_t length) { - _stats.setHasTotalLength(true); - _stats.setTotalLength(length); + stats_.setHasTotalLength(true); + stats_.setTotalLength(length); } void update(const char* value, size_t length) { if (value != nullptr) { - if (!_stats.hasMinimum()) { + if (!stats_.hasMinimum()) { std::string tempStr(value, value + length); setMinimum(tempStr); setMaximum(tempStr); } else { // update min - int minCmp = strncmp(_stats.getMinimum().c_str(), value, - std::min(_stats.getMinimum().length(), length)); - if (minCmp > 0 || (minCmp == 0 && length < _stats.getMinimum().length())) { + int minCmp = strncmp(stats_.getMinimum().c_str(), value, + std::min(stats_.getMinimum().length(), length)); + if (minCmp > 0 || (minCmp == 0 && length < stats_.getMinimum().length())) { setMinimum(std::string(value, value + length)); } // update max - int maxCmp = strncmp(_stats.getMaximum().c_str(), value, - std::min(_stats.getMaximum().length(), length)); - if (maxCmp < 0 || (maxCmp == 0 && length > _stats.getMaximum().length())) { + int maxCmp = strncmp(stats_.getMaximum().c_str(), value, + std::min(stats_.getMaximum().length(), length)); + if (maxCmp < 0 || (maxCmp == 0 && length > stats_.getMaximum().length())) { setMaximum(std::string(value, value + length)); } } } - _stats.setTotalLength(_stats.getTotalLength() + length); + stats_.setTotalLength(stats_.getTotalLength() + length); } void update(std::string value) { @@ -1211,28 +1211,28 @@ namespace orc { void merge(const MutableColumnStatistics& other) override { const StringColumnStatisticsImpl& strStats = dynamic_cast(other); - _stats.merge(strStats._stats); + stats_.merge(strStats.stats_); } void reset() override { - _stats.reset(); + stats_.reset(); setTotalLength(0); } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::StringStatistics* strStats = pbStats.mutable_string_statistics(); - if (_stats.hasMinimum()) { - strStats->set_minimum(_stats.getMinimum()); - strStats->set_maximum(_stats.getMaximum()); + if (stats_.hasMinimum()) { + strStats->set_minimum(stats_.getMinimum()); + strStats->set_maximum(stats_.getMaximum()); } else { strStats->clear_minimum(); strStats->clear_maximum(); } - if (_stats.hasTotalLength()) { - strStats->set_sum(static_cast(_stats.getTotalLength())); + if (stats_.hasTotalLength()) { + strStats->set_sum(static_cast(stats_.getTotalLength())); } else { strStats->clear_sum(); } @@ -1267,13 +1267,13 @@ namespace orc { class TimestampColumnStatisticsImpl : public TimestampColumnStatistics, public MutableColumnStatistics { private: - InternalIntegerStatistics _stats; - bool _hasLowerBound; - bool _hasUpperBound; - int64_t _lowerBound; - int64_t _upperBound; - int32_t _minimumNanos; // last 6 digits of nanosecond of minimum timestamp - int32_t _maximumNanos; // last 6 digits of nanosecond of maximum timestamp + InternalIntegerStatistics stats_; + bool hasLowerBound_; + bool hasUpperBound_; + int64_t lowerBound_; + int64_t upperBound_; + int32_t minimumNanos_; // last 6 digits of nanosecond of minimum timestamp + int32_t maximumNanos_; // last 6 digits of nanosecond of maximum timestamp static constexpr int32_t DEFAULT_MIN_NANOS = 0; static constexpr int32_t DEFAULT_MAX_NANOS = 999999; @@ -1286,36 +1286,36 @@ namespace orc { virtual ~TimestampColumnStatisticsImpl() override; bool hasMinimum() const override { - return _stats.hasMinimum(); + return stats_.hasMinimum(); } bool hasMaximum() const override { - return _stats.hasMaximum(); + return stats_.hasMaximum(); } uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } int64_t getMinimum() const override { if (hasMinimum()) { - return _stats.getMinimum(); + return stats_.getMinimum(); } else { throw ParseError("Minimum is not defined."); } @@ -1323,46 +1323,46 @@ namespace orc { int64_t getMaximum() const override { if (hasMaximum()) { - return _stats.getMaximum(); + return stats_.getMaximum(); } else { throw ParseError("Maximum is not defined."); } } void setMinimum(int64_t minimum) { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); + stats_.setHasMinimum(true); + stats_.setMinimum(minimum); } void setMaximum(int64_t maximum) { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); + stats_.setHasMaximum(true); + stats_.setMaximum(maximum); } void update(int64_t value) { - _stats.updateMinMax(value); + stats_.updateMinMax(value); } void update(int64_t milli, int32_t nano) { - if (!_stats.hasMinimum()) { - _stats.setHasMinimum(true); - _stats.setHasMaximum(true); - _stats.setMinimum(milli); - _stats.setMaximum(milli); - _maximumNanos = _minimumNanos = nano; + if (!stats_.hasMinimum()) { + stats_.setHasMinimum(true); + stats_.setHasMaximum(true); + stats_.setMinimum(milli); + stats_.setMaximum(milli); + maximumNanos_ = minimumNanos_ = nano; } else { - if (milli <= _stats.getMinimum()) { - if (milli < _stats.getMinimum() || nano < _minimumNanos) { - _minimumNanos = nano; + if (milli <= stats_.getMinimum()) { + if (milli < stats_.getMinimum() || nano < minimumNanos_) { + minimumNanos_ = nano; } - _stats.setMinimum(milli); + stats_.setMinimum(milli); } - if (milli >= _stats.getMaximum()) { - if (milli > _stats.getMaximum() || nano > _maximumNanos) { - _maximumNanos = nano; + if (milli >= stats_.getMaximum()) { + if (milli > stats_.getMaximum() || nano > maximumNanos_) { + maximumNanos_ = nano; } - _stats.setMaximum(milli); + stats_.setMaximum(milli); } } } @@ -1371,55 +1371,55 @@ namespace orc { const TimestampColumnStatisticsImpl& tsStats = dynamic_cast(other); - _stats.setHasNull(_stats.hasNull() || tsStats.hasNull()); - _stats.setNumberOfValues(_stats.getNumberOfValues() + tsStats.getNumberOfValues()); + stats_.setHasNull(stats_.hasNull() || tsStats.hasNull()); + stats_.setNumberOfValues(stats_.getNumberOfValues() + tsStats.getNumberOfValues()); if (tsStats.hasMinimum()) { - if (!_stats.hasMinimum()) { - _stats.setHasMinimum(true); - _stats.setHasMaximum(true); - _stats.setMinimum(tsStats.getMinimum()); - _stats.setMaximum(tsStats.getMaximum()); - _minimumNanos = tsStats.getMinimumNanos(); - _maximumNanos = tsStats.getMaximumNanos(); + if (!stats_.hasMinimum()) { + stats_.setHasMinimum(true); + stats_.setHasMaximum(true); + stats_.setMinimum(tsStats.getMinimum()); + stats_.setMaximum(tsStats.getMaximum()); + minimumNanos_ = tsStats.getMinimumNanos(); + maximumNanos_ = tsStats.getMaximumNanos(); } else { - if (tsStats.getMaximum() >= _stats.getMaximum()) { - if (tsStats.getMaximum() > _stats.getMaximum() || - tsStats.getMaximumNanos() > _maximumNanos) { - _maximumNanos = tsStats.getMaximumNanos(); + if (tsStats.getMaximum() >= stats_.getMaximum()) { + if (tsStats.getMaximum() > stats_.getMaximum() || + tsStats.getMaximumNanos() > maximumNanos_) { + maximumNanos_ = tsStats.getMaximumNanos(); } - _stats.setMaximum(tsStats.getMaximum()); + stats_.setMaximum(tsStats.getMaximum()); } - if (tsStats.getMinimum() <= _stats.getMinimum()) { - if (tsStats.getMinimum() < _stats.getMinimum() || - tsStats.getMinimumNanos() < _minimumNanos) { - _minimumNanos = tsStats.getMinimumNanos(); + if (tsStats.getMinimum() <= stats_.getMinimum()) { + if (tsStats.getMinimum() < stats_.getMinimum() || + tsStats.getMinimumNanos() < minimumNanos_) { + minimumNanos_ = tsStats.getMinimumNanos(); } - _stats.setMinimum(tsStats.getMinimum()); + stats_.setMinimum(tsStats.getMinimum()); } } } } void reset() override { - _stats.reset(); - _minimumNanos = DEFAULT_MIN_NANOS; - _maximumNanos = DEFAULT_MAX_NANOS; + stats_.reset(); + minimumNanos_ = DEFAULT_MIN_NANOS; + maximumNanos_ = DEFAULT_MAX_NANOS; } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::TimestampStatistics* tsStats = pbStats.mutable_timestamp_statistics(); - if (_stats.hasMinimum()) { - tsStats->set_minimum_utc(_stats.getMinimum()); - tsStats->set_maximum_utc(_stats.getMaximum()); - if (_minimumNanos != DEFAULT_MIN_NANOS) { - tsStats->set_minimum_nanos(_minimumNanos + 1); + if (stats_.hasMinimum()) { + tsStats->set_minimum_utc(stats_.getMinimum()); + tsStats->set_maximum_utc(stats_.getMaximum()); + if (minimumNanos_ != DEFAULT_MIN_NANOS) { + tsStats->set_minimum_nanos(minimumNanos_ + 1); } - if (_maximumNanos != DEFAULT_MAX_NANOS) { - tsStats->set_maximum_nanos(_maximumNanos + 1); + if (maximumNanos_ != DEFAULT_MAX_NANOS) { + tsStats->set_maximum_nanos(maximumNanos_ + 1); } } else { tsStats->clear_minimum_utc(); @@ -1478,16 +1478,16 @@ namespace orc { } bool hasLowerBound() const override { - return _hasLowerBound; + return hasLowerBound_; } bool hasUpperBound() const override { - return _hasUpperBound; + return hasUpperBound_; } int64_t getLowerBound() const override { if (hasLowerBound()) { - return _lowerBound; + return lowerBound_; } else { throw ParseError("LowerBound is not defined."); } @@ -1495,7 +1495,7 @@ namespace orc { int64_t getUpperBound() const override { if (hasUpperBound()) { - return _upperBound; + return upperBound_; } else { throw ParseError("UpperBound is not defined."); } @@ -1503,7 +1503,7 @@ namespace orc { int32_t getMinimumNanos() const override { if (hasMinimum()) { - return _minimumNanos; + return minimumNanos_; } else { throw ParseError("Minimum is not defined."); } @@ -1511,7 +1511,7 @@ namespace orc { int32_t getMaximumNanos() const override { if (hasMaximum()) { - return _maximumNanos; + return maximumNanos_; } else { throw ParseError("Maximum is not defined."); } @@ -1521,7 +1521,7 @@ namespace orc { class CollectionColumnStatisticsImpl : public CollectionColumnStatistics, public MutableColumnStatistics { private: - InternalCollectionStatistics _stats; + InternalCollectionStatistics stats_; public: CollectionColumnStatisticsImpl() { @@ -1531,40 +1531,40 @@ namespace orc { virtual ~CollectionColumnStatisticsImpl() override; bool hasMinimumChildren() const override { - return _stats.hasMinimum(); + return stats_.hasMinimum(); } bool hasMaximumChildren() const override { - return _stats.hasMaximum(); + return stats_.hasMaximum(); } bool hasTotalChildren() const override { - return _stats.hasSum(); + return stats_.hasSum(); } void increase(uint64_t count) override { - _stats.setNumberOfValues(_stats.getNumberOfValues() + count); + stats_.setNumberOfValues(stats_.getNumberOfValues() + count); } uint64_t getNumberOfValues() const override { - return _stats.getNumberOfValues(); + return stats_.getNumberOfValues(); } void setNumberOfValues(uint64_t value) override { - _stats.setNumberOfValues(value); + stats_.setNumberOfValues(value); } bool hasNull() const override { - return _stats.hasNull(); + return stats_.hasNull(); } void setHasNull(bool hasNull) override { - _stats.setHasNull(hasNull); + stats_.setHasNull(hasNull); } uint64_t getMinimumChildren() const override { if (hasMinimumChildren()) { - return _stats.getMinimum(); + return stats_.getMinimum(); } else { throw ParseError("MinimumChildren is not defined."); } @@ -1572,7 +1572,7 @@ namespace orc { uint64_t getMaximumChildren() const override { if (hasMaximumChildren()) { - return _stats.getMaximum(); + return stats_.getMaximum(); } else { throw ParseError("MaximumChildren is not defined."); } @@ -1580,78 +1580,78 @@ namespace orc { uint64_t getTotalChildren() const override { if (hasTotalChildren()) { - return _stats.getSum(); + return stats_.getSum(); } else { throw ParseError("TotalChildren is not defined."); } } void setMinimumChildren(uint64_t minimum) override { - _stats.setHasMinimum(true); - _stats.setMinimum(minimum); + stats_.setHasMinimum(true); + stats_.setMinimum(minimum); } void setMaximumChildren(uint64_t maximum) override { - _stats.setHasMaximum(true); - _stats.setMaximum(maximum); + stats_.setHasMaximum(true); + stats_.setMaximum(maximum); } void setTotalChildren(uint64_t sum) override { - _stats.setHasSum(true); - _stats.setSum(sum); + stats_.setHasSum(true); + stats_.setSum(sum); } void setHasTotalChildren(bool hasSum) override { - _stats.setHasSum(hasSum); + stats_.setHasSum(hasSum); } void merge(const MutableColumnStatistics& other) override { const CollectionColumnStatisticsImpl& collectionStats = dynamic_cast(other); - _stats.merge(collectionStats._stats); + stats_.merge(collectionStats.stats_); // hasSumValue here means no overflow - _stats.setHasSum(_stats.hasSum() && collectionStats.hasTotalChildren()); - if (_stats.hasSum()) { - uint64_t oldSum = _stats.getSum(); - _stats.setSum(_stats.getSum() + collectionStats.getTotalChildren()); - if (oldSum > _stats.getSum()) { - _stats.setHasSum(false); + stats_.setHasSum(stats_.hasSum() && collectionStats.hasTotalChildren()); + if (stats_.hasSum()) { + uint64_t oldSum = stats_.getSum(); + stats_.setSum(stats_.getSum() + collectionStats.getTotalChildren()); + if (oldSum > stats_.getSum()) { + stats_.setHasSum(false); } } } void reset() override { - _stats.reset(); + stats_.reset(); setTotalChildren(0); } void update(uint64_t value) { - _stats.updateMinMax(value); - if (_stats.hasSum()) { - uint64_t oldSum = _stats.getSum(); - _stats.setSum(_stats.getSum() + value); - if (oldSum > _stats.getSum()) { - _stats.setHasSum(false); + stats_.updateMinMax(value); + if (stats_.hasSum()) { + uint64_t oldSum = stats_.getSum(); + stats_.setSum(stats_.getSum() + value); + if (oldSum > stats_.getSum()) { + stats_.setHasSum(false); } } } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_has_null(_stats.hasNull()); - pbStats.set_number_of_values(_stats.getNumberOfValues()); + pbStats.set_has_null(stats_.hasNull()); + pbStats.set_number_of_values(stats_.getNumberOfValues()); proto::CollectionStatistics* collectionStats = pbStats.mutable_collection_statistics(); - if (_stats.hasMinimum()) { - collectionStats->set_min_children(_stats.getMinimum()); - collectionStats->set_max_children(_stats.getMaximum()); + if (stats_.hasMinimum()) { + collectionStats->set_min_children(stats_.getMinimum()); + collectionStats->set_max_children(stats_.getMaximum()); } else { collectionStats->clear_min_children(); collectionStats->clear_max_children(); } - if (_stats.hasSum()) { - collectionStats->set_total_children(_stats.getSum()); + if (stats_.hasSum()) { + collectionStats->set_total_children(stats_.getSum()); } else { collectionStats->clear_total_children(); } @@ -1688,7 +1688,7 @@ namespace orc { class StatisticsImpl : public Statistics { private: - std::vector colStats; + std::vector colStats_; // DELIBERATELY NOT IMPLEMENTED StatisticsImpl(const StatisticsImpl&); @@ -1700,20 +1700,20 @@ namespace orc { StatisticsImpl(const proto::Footer& footer, const StatContext& statContext); virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId) const override { - return colStats[columnId]; + return colStats_[columnId]; } virtual ~StatisticsImpl() override; uint32_t getNumberOfColumns() const override { - return static_cast(colStats.size()); + return static_cast(colStats_.size()); } }; class StripeStatisticsImpl : public StripeStatistics { private: - std::unique_ptr columnStats; - std::vector > > rowIndexStats; + std::unique_ptr columnStats_; + std::vector > > rowIndexStats_; // DELIBERATELY NOT IMPLEMENTED StripeStatisticsImpl(const StripeStatisticsImpl&); @@ -1725,23 +1725,23 @@ namespace orc { const StatContext& statContext); virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId) const override { - return columnStats->getColumnStatistics(columnId); + return columnStats_->getColumnStatistics(columnId); } uint32_t getNumberOfColumns() const override { - return columnStats->getNumberOfColumns(); + return columnStats_->getNumberOfColumns(); } virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId, uint32_t rowIndex) const override { // check id indices are valid - return rowIndexStats[columnId][rowIndex].get(); + return rowIndexStats_[columnId][rowIndex].get(); } virtual ~StripeStatisticsImpl() override; uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override { - return static_cast(rowIndexStats[columnId].size()); + return static_cast(rowIndexStats_[columnId].size()); } }; diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc index 8507e957670..832549e99e5 100644 --- a/c++/src/StripeStream.cc +++ b/c++/src/StripeStream.cc @@ -25,19 +25,19 @@ namespace orc { - StripeStreamsImpl::StripeStreamsImpl(const RowReaderImpl& _reader, uint64_t _index, - const proto::StripeInformation& _stripeInfo, - const proto::StripeFooter& _footer, uint64_t _stripeStart, - InputStream& _input, const Timezone& _writerTimezone, - const Timezone& _readerTimezone) - : reader(_reader), - stripeInfo(_stripeInfo), - footer(_footer), - stripeIndex(_index), - stripeStart(_stripeStart), - input(_input), - writerTimezone(_writerTimezone), - readerTimezone(_readerTimezone) { + StripeStreamsImpl::StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index, + const proto::StripeInformation& stripeInfo, + const proto::StripeFooter& footer, uint64_t stripeStart, + InputStream& input, const Timezone& writerTimezone, + const Timezone& readerTimezone) + : reader_(reader), + stripeInfo_(stripeInfo), + footer_(footer), + stripeIndex_(index), + stripeStart_(stripeStart), + input_(input), + writerTimezone_(writerTimezone), + readerTimezone_(readerTimezone) { // PASS } @@ -58,51 +58,51 @@ namespace orc { } const std::vector StripeStreamsImpl::getSelectedColumns() const { - return reader.getSelectedColumns(); + return reader_.getSelectedColumns(); } proto::ColumnEncoding StripeStreamsImpl::getEncoding(uint64_t columnId) const { - return footer.columns(static_cast(columnId)); + return footer_.columns(static_cast(columnId)); } const Timezone& StripeStreamsImpl::getWriterTimezone() const { - return writerTimezone; + return writerTimezone_; } const Timezone& StripeStreamsImpl::getReaderTimezone() const { - return readerTimezone; + return readerTimezone_; } std::ostream* StripeStreamsImpl::getErrorStream() const { - return reader.getFileContents().errorStream; + return reader_.getFileContents().errorStream; } std::unique_ptr StripeStreamsImpl::getStream(uint64_t columnId, proto::Stream_Kind kind, bool shouldStream) const { - uint64_t offset = stripeStart; - uint64_t dataEnd = stripeInfo.offset() + stripeInfo.index_length() + stripeInfo.data_length(); - MemoryPool* pool = reader.getFileContents().pool; - for (int i = 0; i < footer.streams_size(); ++i) { - const proto::Stream& stream = footer.streams(i); + uint64_t offset = stripeStart_; + uint64_t dataEnd = stripeInfo_.offset() + stripeInfo_.index_length() + stripeInfo_.data_length(); + MemoryPool* pool = reader_.getFileContents().pool; + for (int i = 0; i < footer_.streams_size(); ++i) { + const proto::Stream& stream = footer_.streams(i); if (stream.has_kind() && stream.kind() == kind && stream.column() == static_cast(columnId)) { uint64_t streamLength = stream.length(); - uint64_t myBlock = shouldStream ? input.getNaturalReadSize() : streamLength; + uint64_t myBlock = shouldStream ? input_.getNaturalReadSize() : streamLength; if (offset + streamLength > dataEnd) { std::stringstream msg; - msg << "Malformed stream meta at stream index " << i << " in stripe " << stripeIndex + msg << "Malformed stream meta at stream index " << i << " in stripe " << stripeIndex_ << ": streamOffset=" << offset << ", streamLength=" << streamLength - << ", stripeOffset=" << stripeInfo.offset() - << ", stripeIndexLength=" << stripeInfo.index_length() - << ", stripeDataLength=" << stripeInfo.data_length(); + << ", stripeOffset=" << stripeInfo_.offset() + << ", stripeIndexLength=" << stripeInfo_.index_length() + << ", stripeDataLength=" << stripeInfo_.data_length(); throw ParseError(msg.str()); } - return createDecompressor(reader.getCompression(), + return createDecompressor(reader_.getCompression(), std::make_unique( - &input, offset, stream.length(), *pool, myBlock), - reader.getCompressionSize(), *pool, - reader.getFileContents().readerMetrics); + &input_, offset, stream.length(), *pool, myBlock), + reader_.getCompressionSize(), *pool, + reader_.getFileContents().readerMetrics); } offset += stream.length(); } @@ -110,38 +110,38 @@ namespace orc { } MemoryPool& StripeStreamsImpl::getMemoryPool() const { - return *reader.getFileContents().pool; + return *reader_.getFileContents().pool; } ReaderMetrics* StripeStreamsImpl::getReaderMetrics() const { - return reader.getFileContents().readerMetrics; + return reader_.getFileContents().readerMetrics; } bool StripeStreamsImpl::getThrowOnHive11DecimalOverflow() const { - return reader.getThrowOnHive11DecimalOverflow(); + return reader_.getThrowOnHive11DecimalOverflow(); } bool StripeStreamsImpl::isDecimalAsLong() const { - return reader.getIsDecimalAsLong(); + return reader_.getIsDecimalAsLong(); } int32_t StripeStreamsImpl::getForcedScaleOnHive11Decimal() const { - return reader.getForcedScaleOnHive11Decimal(); + return reader_.getForcedScaleOnHive11Decimal(); } const SchemaEvolution* StripeStreamsImpl::getSchemaEvolution() const { - return reader.getSchemaEvolution(); + return reader_.getSchemaEvolution(); } void StripeInformationImpl::ensureStripeFooterLoaded() const { - if (stripeFooter.get() == nullptr) { + if (stripeFooter_.get() == nullptr) { std::unique_ptr pbStream = - createDecompressor(compression, + createDecompressor(compression_, std::make_unique( - stream, offset + indexLength + dataLength, footerLength, memory), - blockSize, memory, metrics); - stripeFooter = std::make_unique(); - if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) { + stream_, offset_ + indexLength_ + dataLength_, footerLength_, memory_), + blockSize_, memory_, metrics_); + stripeFooter_ = std::make_unique(); + if (!stripeFooter_->ParseFromZeroCopyStream(pbStream.get())) { throw ParseError("Failed to parse the stripe footer"); } } @@ -150,12 +150,12 @@ namespace orc { std::unique_ptr StripeInformationImpl::getStreamInformation( uint64_t streamId) const { ensureStripeFooterLoaded(); - uint64_t streamOffset = offset; + uint64_t streamOffset = offset_; for (uint64_t s = 0; s < streamId; ++s) { - streamOffset += stripeFooter->streams(static_cast(s)).length(); + streamOffset += stripeFooter_->streams(static_cast(s)).length(); } return std::make_unique( - streamOffset, stripeFooter->streams(static_cast(streamId))); + streamOffset, stripeFooter_->streams(static_cast(streamId))); } } // namespace orc diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh index eae6ce0c314..e27eaab2f63 100644 --- a/c++/src/StripeStream.hh +++ b/c++/src/StripeStream.hh @@ -37,14 +37,14 @@ namespace orc { class StripeStreamsImpl : public StripeStreams { private: - const RowReaderImpl& reader; - const proto::StripeInformation& stripeInfo; - const proto::StripeFooter& footer; - const uint64_t stripeIndex; - const uint64_t stripeStart; - InputStream& input; - const Timezone& writerTimezone; - const Timezone& readerTimezone; + const RowReaderImpl& reader_; + const proto::StripeInformation& stripeInfo_; + const proto::StripeFooter& footer_; + const uint64_t stripeIndex_; + const uint64_t stripeStart_; + InputStream& input_; + const Timezone& writerTimezone_; + const Timezone& readerTimezone_; public: StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index, @@ -87,36 +87,36 @@ namespace orc { class StreamInformationImpl : public StreamInformation { private: - StreamKind kind; - uint64_t column; - uint64_t offset; - uint64_t length; + StreamKind kind_; + uint64_t column_; + uint64_t offset_; + uint64_t length_; public: - StreamInformationImpl(uint64_t _offset, const proto::Stream& stream) - : kind(static_cast(stream.kind())), - column(stream.column()), - offset(_offset), - length(stream.length()) { + StreamInformationImpl(uint64_t offset, const proto::Stream& stream) + : kind_(static_cast(stream.kind())), + column_(stream.column()), + offset_(offset), + length_(stream.length()) { // PASS } ~StreamInformationImpl() override; StreamKind getKind() const override { - return kind; + return kind_; } uint64_t getColumnId() const override { - return column; + return column_; } uint64_t getOffset() const override { - return offset; + return offset_; } uint64_t getLength() const override { - return length; + return length_; } }; @@ -125,34 +125,34 @@ namespace orc { */ class StripeInformationImpl : public StripeInformation { - uint64_t offset; - uint64_t indexLength; - uint64_t dataLength; - uint64_t footerLength; - uint64_t numRows; - InputStream* stream; - MemoryPool& memory; - CompressionKind compression; - uint64_t blockSize; - mutable std::unique_ptr stripeFooter; - ReaderMetrics* metrics; + uint64_t offset_; + uint64_t indexLength_; + uint64_t dataLength_; + uint64_t footerLength_; + uint64_t numRows_; + InputStream* stream_; + MemoryPool& memory_; + CompressionKind compression_; + uint64_t blockSize_; + mutable std::unique_ptr stripeFooter_; + ReaderMetrics* metrics_; void ensureStripeFooterLoaded() const; public: - StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t _dataLength, - uint64_t _footerLength, uint64_t _numRows, InputStream* _stream, - MemoryPool& _memory, CompressionKind _compression, uint64_t _blockSize, - ReaderMetrics* _metrics) - : offset(_offset), - indexLength(_indexLength), - dataLength(_dataLength), - footerLength(_footerLength), - numRows(_numRows), - stream(_stream), - memory(_memory), - compression(_compression), - blockSize(_blockSize), - metrics(_metrics) { + StripeInformationImpl(uint64_t offset, uint64_t indexLength, uint64_t dataLength, + uint64_t footerLength, uint64_t numRows, InputStream* stream, + MemoryPool& memory, CompressionKind compression, uint64_t blockSize, + ReaderMetrics* metrics) + : offset_(offset), + indexLength_(indexLength), + dataLength_(dataLength), + footerLength_(footerLength), + numRows_(numRows), + stream_(stream), + memory_(memory), + compression_(compression), + blockSize_(blockSize), + metrics_(metrics) { // PASS } @@ -161,49 +161,49 @@ namespace orc { } uint64_t getOffset() const override { - return offset; + return offset_; } uint64_t getLength() const override { - return indexLength + dataLength + footerLength; + return indexLength_ + dataLength_ + footerLength_; } uint64_t getIndexLength() const override { - return indexLength; + return indexLength_; } uint64_t getDataLength() const override { - return dataLength; + return dataLength_; } uint64_t getFooterLength() const override { - return footerLength; + return footerLength_; } uint64_t getNumberOfRows() const override { - return numRows; + return numRows_; } uint64_t getNumberOfStreams() const override { ensureStripeFooterLoaded(); - return static_cast(stripeFooter->streams_size()); + return static_cast(stripeFooter_->streams_size()); } std::unique_ptr getStreamInformation(uint64_t streamId) const override; ColumnEncodingKind getColumnEncoding(uint64_t colId) const override { ensureStripeFooterLoaded(); - return static_cast(stripeFooter->columns(static_cast(colId)).kind()); + return static_cast(stripeFooter_->columns(static_cast(colId)).kind()); } uint64_t getDictionarySize(uint64_t colId) const override { ensureStripeFooterLoaded(); return static_cast( - stripeFooter->columns(static_cast(colId)).dictionary_size()); + stripeFooter_->columns(static_cast(colId)).dictionary_size()); } const std::string& getWriterTimezone() const override { ensureStripeFooterLoaded(); - return stripeFooter->writer_timezone(); + return stripeFooter_->writer_timezone(); } }; diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc index 27e14480d50..fbad35fbcfd 100644 --- a/c++/src/Timezone.cc +++ b/c++/src/Timezone.cc @@ -184,49 +184,49 @@ namespace orc { * day = J||M.. */ class FutureRuleImpl : public FutureRule { - std::string ruleString; - TimezoneVariant standard; - bool hasDst; - TimezoneVariant dst; - Transition start; - Transition end; + std::string ruleString_; + TimezoneVariant standard_; + bool hasDst_; + TimezoneVariant dst_; + Transition start_; + Transition end_; // expanded time_t offsets of transitions - std::vector offsets; + std::vector offsets_; // Is the epoch (1 Jan 1970 00:00) in standard time? // This code assumes that the transition dates fall in the same order // each year. Hopefully no timezone regions decide to move across the // equator, which is about what it would take. - bool startInStd; + bool startInStd_; void computeOffsets() { - if (!hasDst) { - startInStd = true; - offsets.resize(1); + if (!hasDst_) { + startInStd_ = true; + offsets_.resize(1); } else { // Insert a transition for the epoch and two per a year for the next // 400 years. We assume that the all even positions are in standard // time if and only if startInStd and the odd ones are the reverse. - offsets.resize(400 * 2 + 1); - startInStd = start.getTime(1970) < end.getTime(1970); + offsets_.resize(400 * 2 + 1); + startInStd_ = start_.getTime(1970) < end_.getTime(1970); int64_t base = 0; for (int64_t year = 1970; year < 1970 + 400; ++year) { - if (startInStd) { - offsets[static_cast(year - 1970) * 2 + 1] = - base + start.getTime(year) - standard.gmtOffset; - offsets[static_cast(year - 1970) * 2 + 2] = - base + end.getTime(year) - dst.gmtOffset; + if (startInStd_) { + offsets_[static_cast(year - 1970) * 2 + 1] = + base + start_.getTime(year) - standard_.gmtOffset; + offsets_[static_cast(year - 1970) * 2 + 2] = + base + end_.getTime(year) - dst_.gmtOffset; } else { - offsets[static_cast(year - 1970) * 2 + 1] = - base + end.getTime(year) - dst.gmtOffset; - offsets[static_cast(year - 1970) * 2 + 2] = - base + start.getTime(year) - standard.gmtOffset; + offsets_[static_cast(year - 1970) * 2 + 1] = + base + end_.getTime(year) - dst_.gmtOffset; + offsets_[static_cast(year - 1970) * 2 + 2] = + base + start_.getTime(year) - standard_.gmtOffset; } base += (isLeap(year) ? 366 : 365) * SECONDS_PER_DAY; } } - offsets[0] = 0; + offsets_[0] = 0; } public: @@ -247,34 +247,34 @@ namespace orc { } bool FutureRuleImpl::isDefined() const { - return ruleString.size() > 0; + return ruleString_.size() > 0; } const TimezoneVariant& FutureRuleImpl::getVariant(int64_t clk) const { - if (!hasDst) { - return standard; + if (!hasDst_) { + return standard_; } else { int64_t adjusted = clk % SECONDS_PER_400_YEARS; if (adjusted < 0) { adjusted += SECONDS_PER_400_YEARS; } - int64_t idx = binarySearch(offsets, adjusted); - if (startInStd == (idx % 2 == 0)) { - return standard; + int64_t idx = binarySearch(offsets_, adjusted); + if (startInStd_ == (idx % 2 == 0)) { + return standard_; } else { - return dst; + return dst_; } } } void FutureRuleImpl::print(std::ostream& out) const { if (isDefined()) { - out << " Future rule: " << ruleString << "\n"; - out << " standard " << standard.toString() << "\n"; - if (hasDst) { - out << " dst " << dst.toString() << "\n"; - out << " start " << start.toString() << "\n"; - out << " end " << end.toString() << "\n"; + out << " Future rule: " << ruleString_ << "\n"; + out << " standard " << standard_.toString() << "\n"; + if (hasDst_) { + out << " dst " << dst_.toString() << "\n"; + out << " start " << start_.toString() << "\n"; + out << " end " << end_.toString() << "\n"; } } } @@ -285,40 +285,40 @@ namespace orc { class FutureRuleParser { public: FutureRuleParser(const std::string& str, FutureRuleImpl* rule) - : ruleString(str), length(str.size()), position(0), output(*rule) { - output.ruleString = str; - if (position != length) { - parseName(output.standard.name); - output.standard.gmtOffset = -parseOffset(); - output.standard.isDst = false; - output.hasDst = position < length; - if (output.hasDst) { - parseName(output.dst.name); - output.dst.isDst = true; - if (ruleString[position] != ',') { - output.dst.gmtOffset = -parseOffset(); + : ruleString_(str), length_(str.size()), position_(0), output_(*rule) { + output_.ruleString_ = str; + if (position_ != length_) { + parseName(output_.standard_.name); + output_.standard_.gmtOffset = -parseOffset(); + output_.standard_.isDst = false; + output_.hasDst_ = position_ < length_; + if (output_.hasDst_) { + parseName(output_.dst_.name); + output_.dst_.isDst = true; + if (ruleString_[position_] != ',') { + output_.dst_.gmtOffset = -parseOffset(); } else { - output.dst.gmtOffset = output.standard.gmtOffset + 60 * 60; + output_.dst_.gmtOffset = output_.standard_.gmtOffset + 60 * 60; } - parseTransition(output.start); - parseTransition(output.end); + parseTransition(output_.start_); + parseTransition(output_.end_); } - if (position != length) { + if (position_ != length_) { throwError("Extra text"); } - output.computeOffsets(); + output_.computeOffsets(); } } private: - const std::string& ruleString; - size_t length; - size_t position; - FutureRuleImpl& output; + const std::string& ruleString_; + size_t length_; + size_t position_; + FutureRuleImpl& output_; void throwError(const char* msg) { std::stringstream buffer; - buffer << msg << " at " << position << " in '" << ruleString << "'"; + buffer << msg << " at " << position_ << " in '" << ruleString_ << "'"; throw TimezoneError(buffer.str()); } @@ -328,46 +328,46 @@ namespace orc { * and set the output string. */ void parseName(std::string& result) { - if (position == length) { + if (position_ == length_) { throwError("name required"); } - size_t start = position; - if (ruleString[position] == '<') { - while (position < length && ruleString[position] != '>') { - position += 1; + size_t start = position_; + if (ruleString_[position_] == '<') { + while (position_ < length_ && ruleString_[position_] != '>') { + position_ += 1; } - if (position == length) { + if (position_ == length_) { throwError("missing close '>'"); } - position += 1; + position_ += 1; } else { - while (position < length) { - char ch = ruleString[position]; + while (position_ < length_) { + char ch = ruleString_[position_]; if (isdigit(ch) || ch == '-' || ch == '+' || ch == ',') { break; } - position += 1; + position_ += 1; } } - if (position == start) { + if (position_ == start) { throwError("empty string not allowed"); } - result = ruleString.substr(start, position - start); + result = ruleString_.substr(start, position_ - start); } /** * Parse an integer of the form [0-9]+ and return it. */ int64_t parseNumber() { - if (position >= length) { + if (position_ >= length_) { throwError("missing number"); } int64_t result = 0; - while (position < length) { - char ch = ruleString[position]; + while (position_ < length_) { + char ch = ruleString_[position_]; if (isdigit(ch)) { result = result * 10 + (ch - '0'); - position += 1; + position_ += 1; } else { break; } @@ -383,17 +383,17 @@ namespace orc { int64_t parseOffset() { int64_t scale = 3600; bool isNegative = false; - if (position < length) { - char ch = ruleString[position]; + if (position_ < length_) { + char ch = ruleString_[position_]; isNegative = ch == '-'; if (ch == '-' || ch == '+') { - position += 1; + position_ += 1; } } int64_t result = parseNumber() * scale; - while (position < length && scale > 1 && ruleString[position] == ':') { + while (position_ < length_ && scale > 1 && ruleString_[position_] == ':') { scale /= 60; - position += 1; + position_ += 1; result += parseNumber() * scale; } if (isNegative) { @@ -407,35 +407,35 @@ namespace orc { * ,(J||M..)(/)? */ void parseTransition(Transition& transition) { - if (length - position < 2 || ruleString[position] != ',') { + if (length_ - position_ < 2 || ruleString_[position_] != ',') { throwError("missing transition"); } - position += 1; - char ch = ruleString[position]; + position_ += 1; + char ch = ruleString_[position_]; if (ch == 'J') { transition.kind = TRANSITION_JULIAN; - position += 1; + position_ += 1; transition.day = parseNumber(); } else if (ch == 'M') { transition.kind = TRANSITION_MONTH; - position += 1; + position_ += 1; transition.month = parseNumber(); - if (position == length || ruleString[position] != '.') { + if (position_ == length_ || ruleString_[position_] != '.') { throwError("missing first ."); } - position += 1; + position_ += 1; transition.week = parseNumber(); - if (position == length || ruleString[position] != '.') { + if (position_ == length_ || ruleString_[position_] != '.') { throwError("missing second ."); } - position += 1; + position_ += 1; transition.day = parseNumber(); } else { transition.kind = TRANSITION_DAY; transition.day = parseNumber(); } - if (position < length && ruleString[position] == '/') { - position += 1; + if (position_ < length_ && ruleString_[position_] == '/') { + position_ += 1; transition.time = parseOffset(); } else { transition.time = 2 * 60 * 60; @@ -565,7 +565,7 @@ namespace orc { class TimezoneImpl : public Timezone { public: - TimezoneImpl(const std::string& _filename, const std::vector& buffer); + TimezoneImpl(const std::string& filename, const std::vector& buffer); virtual ~TimezoneImpl() override; /** @@ -576,11 +576,11 @@ namespace orc { void print(std::ostream&) const override; uint64_t getVersion() const override { - return version; + return version_; } int64_t getEpoch() const override { - return epoch; + return epoch_; } int64_t convertToUTC(int64_t clk) const override { @@ -599,31 +599,31 @@ namespace orc { void parseZoneFile(const unsigned char* ptr, uint64_t sectionOffset, uint64_t fileLength, const VersionParser& version); // filename - std::string filename; + std::string filename_; // the version of the file - uint64_t version; + uint64_t version_; // the list of variants for this timezone - std::vector variants; + std::vector variants_; // the list of the times where the local rules change - std::vector transitions; + std::vector transitions_; // the variant that starts at this transition. - std::vector currentVariant; + std::vector currentVariant_; // the variant before the first transition - uint64_t ancientVariant; + uint64_t ancientVariant_; // the rule for future times - std::shared_ptr futureRule; + std::shared_ptr futureRule_; // the last explicit transition after which we use the future rule - int64_t lastTransition; + int64_t lastTransition_; // The ORC epoch time in this timezone. - int64_t epoch; + int64_t epoch_; }; DIAGNOSTIC_PUSH @@ -639,8 +639,8 @@ namespace orc { // PASS } - TimezoneImpl::TimezoneImpl(const std::string& _filename, const std::vector& buffer) - : filename(_filename) { + TimezoneImpl::TimezoneImpl(const std::string& filename, const std::vector& buffer) + : filename_(filename) { parseZoneFile(&buffer[0], 0, buffer.size(), Version1Parser()); // Build the literal for the ORC epoch // 2015 Jan 1 00:00:00 @@ -653,7 +653,7 @@ namespace orc { epochStruct.tm_year = 2015 - 1900; epochStruct.tm_isdst = 0; time_t utcEpoch = timegm(&epochStruct); - epoch = utcEpoch - getVariant(utcEpoch).gmtOffset; + epoch_ = utcEpoch - getVariant(utcEpoch).gmtOffset; } const char* getTimezoneDirectory() { @@ -731,9 +731,9 @@ namespace orc { uint64_t variantCount, uint64_t nameOffset, uint64_t nameCount) { for (uint64_t variant = 0; variant < variantCount; ++variant) { - variants[variant].gmtOffset = + variants_[variant].gmtOffset = static_cast(decode32(ptr + variantOffset + 6 * variant)); - variants[variant].isDst = ptr[variantOffset + 6 * variant + 4] != 0; + variants_[variant].isDst = ptr[variantOffset + 6 * variant + 4] != 0; uint64_t nameStart = ptr[variantOffset + 6 * variant + 5]; if (nameStart >= nameCount) { std::stringstream buffer; @@ -741,7 +741,7 @@ namespace orc { << " >= " << nameCount; throw TimezoneError(buffer.str()); } - variants[variant].name = + variants_[variant].name = std::string(reinterpret_cast(ptr) + nameOffset + nameStart); } } @@ -781,7 +781,7 @@ namespace orc { if (fileLength < headerOffset + 6 * 4 || strncmp(reinterpret_cast(ptr) + magicOffset, "TZif", 4) != 0) { std::stringstream buffer; - buffer << "non-tzfile " << filename; + buffer << "non-tzfile " << filename_; throw TimezoneError(buffer.str()); } @@ -802,7 +802,7 @@ namespace orc { if (sectionLength > fileLength) { std::stringstream buffer; - buffer << "tzfile too short " << filename << " needs " << sectionLength << " and has " + buffer << "tzfile too short " << filename_ << " needs " << sectionLength << " and has " << fileLength; throw TimezoneError(buffer.str()); } @@ -812,82 +812,82 @@ namespace orc { parseZoneFile(ptr, sectionLength, fileLength, Version2Parser()); return; } - version = versionParser.getVersion(); - variants.resize(variantCount); - transitions.resize(timeCount); - currentVariant.resize(timeCount); + version_ = versionParser.getVersion(); + variants_.resize(variantCount); + transitions_.resize(timeCount); + currentVariant_.resize(timeCount); parseTimeVariants(ptr, variantOffset, variantCount, nameOffset, nameCount); bool foundAncient = false; for (uint64_t t = 0; t < timeCount; ++t) { - transitions[t] = versionParser.parseTime(ptr + timeOffset + t * versionParser.getTimeSize()); - currentVariant[t] = ptr[timeVariantOffset + t]; - if (currentVariant[t] >= variantCount) { + transitions_[t] = versionParser.parseTime(ptr + timeOffset + t * versionParser.getTimeSize()); + currentVariant_[t] = ptr[timeVariantOffset + t]; + if (currentVariant_[t] >= variantCount) { std::stringstream buffer; - buffer << "tzfile rule out of range " << filename << " references rule " - << currentVariant[t] << " of " << variantCount; + buffer << "tzfile rule out of range " << filename_ << " references rule " + << currentVariant_[t] << " of " << variantCount; throw TimezoneError(buffer.str()); } // find the oldest standard time and use that as the ancient value - if (!foundAncient && !variants[currentVariant[t]].isDst) { + if (!foundAncient && !variants_[currentVariant_[t]].isDst) { foundAncient = true; - ancientVariant = currentVariant[t]; + ancientVariant_ = currentVariant_[t]; } } if (!foundAncient) { - ancientVariant = 0; + ancientVariant_ = 0; } - futureRule = parseFutureRule( + futureRule_ = parseFutureRule( versionParser.parseFutureString(ptr, sectionLength, fileLength - sectionLength)); // find the lower bound for applying the future rule - if (futureRule->isDefined()) { + if (futureRule_->isDefined()) { if (timeCount > 0) { - lastTransition = transitions[timeCount - 1]; + lastTransition_ = transitions_[timeCount - 1]; } else { - lastTransition = INT64_MIN; + lastTransition_ = INT64_MIN; } } else { - lastTransition = INT64_MAX; + lastTransition_ = INT64_MAX; } } const TimezoneVariant& TimezoneImpl::getVariant(int64_t clk) const { // if it is after the last explicit entry in the table, // use the future rule to get an answer - if (clk > lastTransition) { - return futureRule->getVariant(clk); + if (clk > lastTransition_) { + return futureRule_->getVariant(clk); } else { - int64_t transition = binarySearch(transitions, clk); + int64_t transition = binarySearch(transitions_, clk); uint64_t idx; if (transition < 0) { - idx = ancientVariant; + idx = ancientVariant_; } else { - idx = currentVariant[static_cast(transition)]; + idx = currentVariant_[static_cast(transition)]; } - return variants[idx]; + return variants_[idx]; } } void TimezoneImpl::print(std::ostream& out) const { - out << "Timezone file: " << filename << "\n"; - out << " Version: " << version << "\n"; - futureRule->print(out); - for (uint64_t r = 0; r < variants.size(); ++r) { - out << " Variant " << r << ": " << variants[r].toString() << "\n"; + out << "Timezone file: " << filename_ << "\n"; + out << " Version: " << version_ << "\n"; + futureRule_->print(out); + for (uint64_t r = 0; r < variants_.size(); ++r) { + out << " Variant " << r << ": " << variants_[r].toString() << "\n"; } - for (uint64_t t = 0; t < transitions.size(); ++t) { + for (uint64_t t = 0; t < transitions_.size(); ++t) { tm timeStruct; tm* result = nullptr; char buffer[25]; if (sizeof(time_t) >= 8) { - time_t val = transitions[t]; + time_t val = transitions_[t]; result = gmtime_r(&val, &timeStruct); if (result) { strftime(buffer, sizeof(buffer), "%F %H:%M:%S", &timeStruct); } } - out << " Transition: " << (result == nullptr ? "null" : buffer) << " (" << transitions[t] - << ") -> " << variants[currentVariant[t]].name << "\n"; + out << " Transition: " << (result == nullptr ? "null" : buffer) << " (" << transitions_[t] + << ") -> " << variants_[currentVariant_[t]].name << "\n"; } } diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc index c427a962b59..c7b073c7134 100644 --- a/c++/src/TypeImpl.cc +++ b/c++/src/TypeImpl.cc @@ -29,54 +29,54 @@ namespace orc { // PASS } - TypeImpl::TypeImpl(TypeKind _kind) { - parent = nullptr; - columnId = -1; - maximumColumnId = -1; - kind = _kind; - maxLength = 0; - precision = 0; - scale = 0; - subtypeCount = 0; - } - - TypeImpl::TypeImpl(TypeKind _kind, uint64_t _maxLength) { - parent = nullptr; - columnId = -1; - maximumColumnId = -1; - kind = _kind; - maxLength = _maxLength; - precision = 0; - scale = 0; - subtypeCount = 0; - } - - TypeImpl::TypeImpl(TypeKind _kind, uint64_t _precision, uint64_t _scale) { - parent = nullptr; - columnId = -1; - maximumColumnId = -1; - kind = _kind; - maxLength = 0; - precision = _precision; - scale = _scale; - subtypeCount = 0; + TypeImpl::TypeImpl(TypeKind kind) { + parent_ = nullptr; + columnId_ = -1; + maximumColumnId_ = -1; + kind_ = kind; + maxLength_ = 0; + precision_ = 0; + scale_ = 0; + subtypeCount_ = 0; + } + + TypeImpl::TypeImpl(TypeKind kind, uint64_t maxLength) { + parent_ = nullptr; + columnId_ = -1; + maximumColumnId_ = -1; + kind_ = kind; + maxLength_ = maxLength; + precision_ = 0; + scale_ = 0; + subtypeCount_ = 0; + } + + TypeImpl::TypeImpl(TypeKind kind, uint64_t precision, uint64_t scale) { + parent_ = nullptr; + columnId_ = -1; + maximumColumnId_ = -1; + kind_ = kind; + maxLength_ = 0; + precision_ = precision; + scale_ = scale; + subtypeCount_ = 0; } uint64_t TypeImpl::assignIds(uint64_t root) const { - columnId = static_cast(root); + columnId_ = static_cast(root); uint64_t current = root + 1; - for (uint64_t i = 0; i < subtypeCount; ++i) { - current = dynamic_cast(subTypes[i].get())->assignIds(current); + for (uint64_t i = 0; i < subtypeCount_; ++i) { + current = dynamic_cast(subTypes_[i].get())->assignIds(current); } - maximumColumnId = static_cast(current) - 1; + maximumColumnId_ = static_cast(current) - 1; return current; } void TypeImpl::ensureIdAssigned() const { - if (columnId == -1) { + if (columnId_ == -1) { const TypeImpl* root = this; - while (root->parent != nullptr) { - root = root->parent; + while (root->parent_ != nullptr) { + root = root->parent_; } root->assignIds(0); } @@ -84,94 +84,94 @@ namespace orc { uint64_t TypeImpl::getColumnId() const { ensureIdAssigned(); - return static_cast(columnId); + return static_cast(columnId_); } uint64_t TypeImpl::getMaximumColumnId() const { ensureIdAssigned(); - return static_cast(maximumColumnId); + return static_cast(maximumColumnId_); } TypeKind TypeImpl::getKind() const { - return kind; + return kind_; } uint64_t TypeImpl::getSubtypeCount() const { - return subtypeCount; + return subtypeCount_; } const Type* TypeImpl::getSubtype(uint64_t i) const { - return subTypes[i].get(); + return subTypes_[i].get(); } const std::string& TypeImpl::getFieldName(uint64_t i) const { - return fieldNames[i]; + return fieldNames_[i]; } uint64_t TypeImpl::getMaximumLength() const { - return maxLength; + return maxLength_; } uint64_t TypeImpl::getPrecision() const { - return precision; + return precision_; } uint64_t TypeImpl::getScale() const { - return scale; + return scale_; } Type& TypeImpl::setAttribute(const std::string& key, const std::string& value) { - attributes[key] = value; + attributes_[key] = value; return *this; } bool TypeImpl::hasAttributeKey(const std::string& key) const { - return attributes.find(key) != attributes.end(); + return attributes_.find(key) != attributes_.end(); } Type& TypeImpl::removeAttribute(const std::string& key) { - auto it = attributes.find(key); - if (it == attributes.end()) { + auto it = attributes_.find(key); + if (it == attributes_.end()) { throw std::range_error("Key not found: " + key); } - attributes.erase(it); + attributes_.erase(it); return *this; } std::vector TypeImpl::getAttributeKeys() const { std::vector ret; - ret.reserve(attributes.size()); - for (auto& attribute : attributes) { + ret.reserve(attributes_.size()); + for (auto& attribute : attributes_) { ret.push_back(attribute.first); } return ret; } std::string TypeImpl::getAttributeValue(const std::string& key) const { - auto it = attributes.find(key); - if (it == attributes.end()) { + auto it = attributes_.find(key); + if (it == attributes_.end()) { throw std::range_error("Key not found: " + key); } return it->second; } - void TypeImpl::setIds(uint64_t _columnId, uint64_t _maxColumnId) { - columnId = static_cast(_columnId); - maximumColumnId = static_cast(_maxColumnId); + void TypeImpl::setIds(uint64_t columnId, uint64_t maxColumnId) { + columnId_ = static_cast(columnId); + maximumColumnId_ = static_cast(maxColumnId); } void TypeImpl::addChildType(std::unique_ptr childType) { TypeImpl* child = dynamic_cast(childType.get()); - subTypes.push_back(std::move(childType)); + subTypes_.push_back(std::move(childType)); if (child != nullptr) { - child->parent = this; + child->parent_ = this; } - subtypeCount += 1; + subtypeCount_ += 1; } Type* TypeImpl::addStructField(const std::string& fieldName, std::unique_ptr fieldType) { addChildType(std::move(fieldType)); - fieldNames.push_back(fieldName); + fieldNames_.push_back(fieldName); return this; } @@ -190,7 +190,7 @@ namespace orc { } std::string TypeImpl::toString() const { - switch (static_cast(kind)) { + switch (static_cast(kind_)) { case BOOLEAN: return "boolean"; case BYTE: @@ -214,20 +214,20 @@ namespace orc { case TIMESTAMP_INSTANT: return "timestamp with local time zone"; case LIST: - return "array<" + (subTypes[0] ? subTypes[0]->toString() : "void") + ">"; + return "array<" + (subTypes_[0] ? subTypes_[0]->toString() : "void") + ">"; case MAP: - return "map<" + (subTypes[0] ? subTypes[0]->toString() : "void") + "," + - (subTypes[1] ? subTypes[1]->toString() : "void") + ">"; + return "map<" + (subTypes_[0] ? subTypes_[0]->toString() : "void") + "," + + (subTypes_[1] ? subTypes_[1]->toString() : "void") + ">"; case STRUCT: { std::string result = "struct<"; - for (size_t i = 0; i < subTypes.size(); ++i) { + for (size_t i = 0; i < subTypes_.size(); ++i) { if (i != 0) { result += ","; } - if (isUnquotedFieldName(fieldNames[i])) { - result += fieldNames[i]; + if (isUnquotedFieldName(fieldNames_[i])) { + result += fieldNames_[i]; } else { - std::string name(fieldNames[i]); + std::string name(fieldNames_[i]); size_t pos = 0; while ((pos = name.find("`", pos)) != std::string::npos) { name.replace(pos, 1, "``"); @@ -238,37 +238,37 @@ namespace orc { result += "`"; } result += ":"; - result += subTypes[i]->toString(); + result += subTypes_[i]->toString(); } result += ">"; return result; } case UNION: { std::string result = "uniontype<"; - for (size_t i = 0; i < subTypes.size(); ++i) { + for (size_t i = 0; i < subTypes_.size(); ++i) { if (i != 0) { result += ","; } - result += subTypes[i]->toString(); + result += subTypes_[i]->toString(); } result += ">"; return result; } case DECIMAL: { std::stringstream result; - result << "decimal(" << precision << "," << scale << ")"; + result << "decimal(" << precision_ << "," << scale_ << ")"; return result.str(); } case DATE: return "date"; case VARCHAR: { std::stringstream result; - result << "varchar(" << maxLength << ")"; + result << "varchar(" << maxLength_ << ")"; return result.str(); } case CHAR: { std::stringstream result; - result << "char(" << maxLength << ")"; + result << "char(" << maxLength_ << ")"; return result.str(); } default: @@ -285,7 +285,7 @@ namespace orc { std::unique_ptr TypeImpl::createRowBatch(uint64_t capacity, MemoryPool& memoryPool, bool encoded, bool useTightNumericVector) const { - switch (static_cast(kind)) { + switch (static_cast(kind_)) { case BOOLEAN: if (useTightNumericVector) { return std::make_unique(capacity, memoryPool); diff --git a/c++/src/TypeImpl.hh b/c++/src/TypeImpl.hh index 6d0743793a8..647d5a5d2c5 100644 --- a/c++/src/TypeImpl.hh +++ b/c++/src/TypeImpl.hh @@ -30,17 +30,17 @@ namespace orc { class TypeImpl : public Type { private: - TypeImpl* parent; - mutable int64_t columnId; - mutable int64_t maximumColumnId; - TypeKind kind; - std::vector> subTypes; - std::vector fieldNames; - uint64_t subtypeCount; - uint64_t maxLength; - uint64_t precision; - uint64_t scale; - std::map attributes; + TypeImpl* parent_; + mutable int64_t columnId_; + mutable int64_t maximumColumnId_; + TypeKind kind_; + std::vector> subTypes_; + std::vector fieldNames_; + uint64_t subtypeCount_; + uint64_t maxLength_; + uint64_t precision_; + uint64_t scale_; + std::map attributes_; public: /** diff --git a/c++/src/Utils.hh b/c++/src/Utils.hh index 751c09b2059..4a609788f93 100644 --- a/c++/src/Utils.hh +++ b/c++/src/Utils.hh @@ -25,34 +25,34 @@ namespace orc { class AutoStopwatch { - std::chrono::high_resolution_clock::time_point start; - std::atomic* latencyUs; - std::atomic* count; - bool minus; + std::chrono::high_resolution_clock::time_point start_; + std::atomic* latencyUs_; + std::atomic* count_; + bool minus_; public: - AutoStopwatch(std::atomic* _latencyUs, std::atomic* _count, - bool _minus = false) - : latencyUs(_latencyUs), count(_count), minus(_minus) { - if (latencyUs) { - start = std::chrono::high_resolution_clock::now(); + AutoStopwatch(std::atomic* latencyUs, std::atomic* count, + bool minus = false) + : latencyUs_(latencyUs), count_(count), minus_(minus) { + if (latencyUs_) { + start_ = std::chrono::high_resolution_clock::now(); } } ~AutoStopwatch() { - if (latencyUs) { + if (latencyUs_) { std::chrono::microseconds elapsedTime = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start); - if (!minus) { - latencyUs->fetch_add(static_cast(elapsedTime.count())); + std::chrono::high_resolution_clock::now() - start_); + if (!minus_) { + latencyUs_->fetch_add(static_cast(elapsedTime.count())); } else { - latencyUs->fetch_sub(static_cast(elapsedTime.count())); + latencyUs_->fetch_sub(static_cast(elapsedTime.count())); } } - if (count) { - count->fetch_add(1); + if (count_) { + count_->fetch_add(1); } } }; diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc index b9e28545866..28f5216de49 100644 --- a/c++/src/Vector.cc +++ b/c++/src/Vector.cc @@ -66,8 +66,8 @@ namespace orc { // PASS } - EncodedStringVectorBatch::EncodedStringVectorBatch(uint64_t _capacity, MemoryPool& pool) - : StringVectorBatch(_capacity, pool), dictionary(), index(pool, _capacity) { + EncodedStringVectorBatch::EncodedStringVectorBatch(uint64_t capacityValue, MemoryPool& pool) + : StringVectorBatch(capacityValue, pool), dictionary(), index(pool, capacityValue) { // PASS } @@ -88,10 +88,10 @@ namespace orc { } } - StringVectorBatch::StringVectorBatch(uint64_t _capacity, MemoryPool& pool) - : ColumnVectorBatch(_capacity, pool), - data(pool, _capacity), - length(pool, _capacity), + StringVectorBatch::StringVectorBatch(uint64_t capacityValue, MemoryPool& pool) + : ColumnVectorBatch(capacityValue, pool), + data(pool, capacityValue), + length(pool, capacityValue), blob(pool) { // PASS } @@ -383,7 +383,7 @@ namespace orc { readScales.capacity() * sizeof(int64_t)); } - Decimal::Decimal(const Int128& _value, int32_t _scale) : value(_value), scale(_scale) { + Decimal::Decimal(const Int128& decimalValue, int32_t scaleValue) : value(decimalValue), scale(scaleValue) { // PASS } @@ -408,8 +408,8 @@ namespace orc { return value.toDecimalString(scale, trimTrailingZeros); } - TimestampVectorBatch::TimestampVectorBatch(uint64_t _capacity, MemoryPool& pool) - : ColumnVectorBatch(_capacity, pool), data(pool, _capacity), nanoseconds(pool, _capacity) { + TimestampVectorBatch::TimestampVectorBatch(uint64_t capacityValue, MemoryPool& pool) + : ColumnVectorBatch(capacityValue, pool), data(pool, capacity), nanoseconds(pool, capacity) { // PASS } diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc index 89eb3781cfa..04a63037f54 100644 --- a/c++/src/Writer.cc +++ b/c++/src/Writer.cc @@ -71,24 +71,24 @@ namespace orc { }; WriterOptions::WriterOptions() - : privateBits(std::unique_ptr(new WriterOptionsPrivate())) { + : privateBits_(std::unique_ptr(new WriterOptionsPrivate())) { // PASS } WriterOptions::WriterOptions(const WriterOptions& rhs) - : privateBits(std::unique_ptr( - new WriterOptionsPrivate(*(rhs.privateBits.get())))) { + : privateBits_(std::unique_ptr( + new WriterOptionsPrivate(*(rhs.privateBits_.get())))) { // PASS } WriterOptions::WriterOptions(WriterOptions& rhs) { - // swap privateBits with rhs - privateBits.swap(rhs.privateBits); + // swap privateBits_ with rhs + privateBits_.swap(rhs.privateBits_); } WriterOptions& WriterOptions::operator=(const WriterOptions& rhs) { if (this != &rhs) { - privateBits.reset(new WriterOptionsPrivate(*(rhs.privateBits.get()))); + privateBits_.reset(new WriterOptionsPrivate(*(rhs.privateBits_.get()))); } return *this; } @@ -97,7 +97,7 @@ namespace orc { // PASS } RleVersion WriterOptions::getRleVersion() const { - if (privateBits->fileVersion == FileVersion::v_0_11()) { + if (privateBits_->fileVersion == FileVersion::v_0_11()) { return RleVersion_1; } @@ -105,186 +105,186 @@ namespace orc { } WriterOptions& WriterOptions::setStripeSize(uint64_t size) { - privateBits->stripeSize = size; + privateBits_->stripeSize = size; return *this; } uint64_t WriterOptions::getStripeSize() const { - return privateBits->stripeSize; + return privateBits_->stripeSize; } WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) { if (size >= (1 << 23)) { throw std::invalid_argument("Compression block size cannot be greater or equal than 8M"); } - privateBits->compressionBlockSize = size; + privateBits_->compressionBlockSize = size; return *this; } uint64_t WriterOptions::getCompressionBlockSize() const { - return privateBits->compressionBlockSize; + return privateBits_->compressionBlockSize; } WriterOptions& WriterOptions::setRowIndexStride(uint64_t stride) { - privateBits->rowIndexStride = stride; - privateBits->enableIndex = (stride != 0); + privateBits_->rowIndexStride = stride; + privateBits_->enableIndex = (stride != 0); return *this; } uint64_t WriterOptions::getRowIndexStride() const { - return privateBits->rowIndexStride; + return privateBits_->rowIndexStride; } WriterOptions& WriterOptions::setDictionaryKeySizeThreshold(double val) { - privateBits->dictionaryKeySizeThreshold = val; + privateBits_->dictionaryKeySizeThreshold = val; return *this; } double WriterOptions::getDictionaryKeySizeThreshold() const { - return privateBits->dictionaryKeySizeThreshold; + return privateBits_->dictionaryKeySizeThreshold; } WriterOptions& WriterOptions::setFileVersion(const FileVersion& version) { // Only Hive_0_11 and Hive_0_12 version are supported currently if (version.getMajor() == 0 && (version.getMinor() == 11 || version.getMinor() == 12)) { - privateBits->fileVersion = version; + privateBits_->fileVersion = version; return *this; } if (version == FileVersion::UNSTABLE_PRE_2_0()) { - *privateBits->errorStream << "Warning: ORC files written in " + *privateBits_->errorStream << "Warning: ORC files written in " << FileVersion::UNSTABLE_PRE_2_0().toString() << " will not be readable by other versions of the software." << " It is only for developer testing.\n"; - privateBits->fileVersion = version; + privateBits_->fileVersion = version; return *this; } throw std::logic_error("Unsupported file version specified."); } FileVersion WriterOptions::getFileVersion() const { - return privateBits->fileVersion; + return privateBits_->fileVersion; } WriterOptions& WriterOptions::setCompression(CompressionKind comp) { - privateBits->compression = comp; + privateBits_->compression = comp; return *this; } CompressionKind WriterOptions::getCompression() const { - return privateBits->compression; + return privateBits_->compression; } WriterOptions& WriterOptions::setCompressionStrategy(CompressionStrategy strategy) { - privateBits->compressionStrategy = strategy; + privateBits_->compressionStrategy = strategy; return *this; } CompressionStrategy WriterOptions::getCompressionStrategy() const { - return privateBits->compressionStrategy; + return privateBits_->compressionStrategy; } bool WriterOptions::getAlignedBitpacking() const { - return privateBits->compressionStrategy == CompressionStrategy ::CompressionStrategy_SPEED; + return privateBits_->compressionStrategy == CompressionStrategy ::CompressionStrategy_SPEED; } WriterOptions& WriterOptions::setPaddingTolerance(double tolerance) { - privateBits->paddingTolerance = tolerance; + privateBits_->paddingTolerance = tolerance; return *this; } double WriterOptions::getPaddingTolerance() const { - return privateBits->paddingTolerance; + return privateBits_->paddingTolerance; } WriterOptions& WriterOptions::setMemoryPool(MemoryPool* memoryPool) { - privateBits->memoryPool = memoryPool; + privateBits_->memoryPool = memoryPool; return *this; } MemoryPool* WriterOptions::getMemoryPool() const { - return privateBits->memoryPool; + return privateBits_->memoryPool; } WriterOptions& WriterOptions::setErrorStream(std::ostream& errStream) { - privateBits->errorStream = &errStream; + privateBits_->errorStream = &errStream; return *this; } std::ostream* WriterOptions::getErrorStream() const { - return privateBits->errorStream; + return privateBits_->errorStream; } bool WriterOptions::getEnableIndex() const { - return privateBits->enableIndex; + return privateBits_->enableIndex; } bool WriterOptions::getEnableDictionary() const { - return privateBits->dictionaryKeySizeThreshold > 0.0; + return privateBits_->dictionaryKeySizeThreshold > 0.0; } WriterOptions& WriterOptions::setColumnsUseBloomFilter(const std::set& columns) { - privateBits->columnsUseBloomFilter = columns; + privateBits_->columnsUseBloomFilter = columns; return *this; } bool WriterOptions::isColumnUseBloomFilter(uint64_t column) const { - return privateBits->columnsUseBloomFilter.find(column) != - privateBits->columnsUseBloomFilter.end(); + return privateBits_->columnsUseBloomFilter.find(column) != + privateBits_->columnsUseBloomFilter.end(); } WriterOptions& WriterOptions::setBloomFilterFPP(double fpp) { - privateBits->bloomFilterFalsePositiveProb = fpp; + privateBits_->bloomFilterFalsePositiveProb = fpp; return *this; } double WriterOptions::getBloomFilterFPP() const { - return privateBits->bloomFilterFalsePositiveProb; + return privateBits_->bloomFilterFalsePositiveProb; } // delibrately not provide setter to write bloom filter version because // we only support UTF8 for now. BloomFilterVersion WriterOptions::getBloomFilterVersion() const { - return privateBits->bloomFilterVersion; + return privateBits_->bloomFilterVersion; } const Timezone& WriterOptions::getTimezone() const { - return getTimezoneByName(privateBits->timezone); + return getTimezoneByName(privateBits_->timezone); } const std::string& WriterOptions::getTimezoneName() const { - return privateBits->timezone; + return privateBits_->timezone; } WriterOptions& WriterOptions::setTimezoneName(const std::string& zone) { - privateBits->timezone = zone; + privateBits_->timezone = zone; return *this; } WriterMetrics* WriterOptions::getWriterMetrics() const { - return privateBits->metrics; + return privateBits_->metrics; } WriterOptions& WriterOptions::setWriterMetrics(WriterMetrics* metrics) { - privateBits->metrics = metrics; + privateBits_->metrics = metrics; return *this; } WriterOptions& WriterOptions::setUseTightNumericVector(bool useTightNumericVector) { - privateBits->useTightNumericVector = useTightNumericVector; + privateBits_->useTightNumericVector = useTightNumericVector; return *this; } bool WriterOptions::getUseTightNumericVector() const { - return privateBits->useTightNumericVector; + return privateBits_->useTightNumericVector; } WriterOptions& WriterOptions::setOutputBufferCapacity(uint64_t capacity) { - privateBits->outputBufferCapacity = capacity; + privateBits_->outputBufferCapacity = capacity; return *this; } uint64_t WriterOptions::getOutputBufferCapacity() const { - return privateBits->outputBufferCapacity; + return privateBits_->outputBufferCapacity; } Writer::~Writer() { @@ -293,25 +293,25 @@ namespace orc { class WriterImpl : public Writer { private: - std::unique_ptr columnWriter; - std::unique_ptr compressionStream; - std::unique_ptr bufferedStream; - std::unique_ptr streamsFactory; - OutputStream* outStream; - WriterOptions options; - const Type& type; - uint64_t stripeRows, totalRows, indexRows; - uint64_t currentOffset; - proto::Footer fileFooter; - proto::PostScript postScript; - proto::StripeInformation stripeInfo; - proto::Metadata metadata; + std::unique_ptr columnWriter_; + std::unique_ptr compressionStream_; + std::unique_ptr bufferedStream_; + std::unique_ptr streamsFactory_; + OutputStream* outStream_; + WriterOptions options_; + const Type& type_; + uint64_t stripeRows_, totalRows_, indexRows_; + uint64_t currentOffset_; + proto::Footer fileFooter_; + proto::PostScript postScript_; + proto::StripeInformation stripeInfo_; + proto::Metadata metadata_; static const char* magicId; static const WriterId writerId; - bool useTightNumericVector; - int32_t stripesAtLastFlush; - uint64_t lastFlushOffset; + bool useTightNumericVector_; + int32_t stripesAtLastFlush_; + uint64_t lastFlushOffset_; public: WriterImpl(const Type& type, OutputStream* stream, const WriterOptions& options); @@ -342,93 +342,93 @@ namespace orc { const WriterId WriterImpl::writerId = WriterId::ORC_CPP_WRITER; WriterImpl::WriterImpl(const Type& t, OutputStream* stream, const WriterOptions& opts) - : outStream(stream), options(opts), type(t) { - streamsFactory = createStreamsFactory(options, outStream); - columnWriter = buildWriter(type, *streamsFactory, options); - stripeRows = totalRows = indexRows = 0; - currentOffset = 0; - stripesAtLastFlush = 0; - lastFlushOffset = 0; + : outStream_(stream), options_(opts), type_(t) { + streamsFactory_ = createStreamsFactory(options_, outStream_); + columnWriter_ = buildWriter(type_, *streamsFactory_, options_); + stripeRows_ = totalRows_ = indexRows_ = 0; + currentOffset_ = 0; + stripesAtLastFlush_ = 0; + lastFlushOffset_ = 0; - useTightNumericVector = opts.getUseTightNumericVector(); + useTightNumericVector_ = opts.getUseTightNumericVector(); // compression stream for stripe footer, file footer and metadata - compressionStream = - createCompressor(options.getCompression(), outStream, options.getCompressionStrategy(), - options.getOutputBufferCapacity(), options.getCompressionBlockSize(), - *options.getMemoryPool(), options.getWriterMetrics()); + compressionStream_ = + createCompressor(options_.getCompression(), outStream_, options_.getCompressionStrategy(), + options_.getOutputBufferCapacity(), options_.getCompressionBlockSize(), + *options_.getMemoryPool(), options_.getWriterMetrics()); // uncompressed stream for post script - bufferedStream.reset(new BufferedOutputStream(*options.getMemoryPool(), outStream, + bufferedStream_.reset(new BufferedOutputStream(*options_.getMemoryPool(), outStream_, 1024, // buffer capacity: 1024 bytes - options.getCompressionBlockSize(), - options.getWriterMetrics())); + options_.getCompressionBlockSize(), + options_.getWriterMetrics())); init(); } std::unique_ptr WriterImpl::createRowBatch(uint64_t size) const { - return type.createRowBatch(size, *options.getMemoryPool(), false, useTightNumericVector); + return type_.createRowBatch(size, *options_.getMemoryPool(), false, useTightNumericVector_); } void WriterImpl::add(ColumnVectorBatch& rowsToAdd) { - if (options.getEnableIndex()) { + if (options_.getEnableIndex()) { uint64_t pos = 0; uint64_t chunkSize = 0; - uint64_t rowIndexStride = options.getRowIndexStride(); + uint64_t rowIndexStride = options_.getRowIndexStride(); while (pos < rowsToAdd.numElements) { - chunkSize = std::min(rowsToAdd.numElements - pos, rowIndexStride - indexRows); - columnWriter->add(rowsToAdd, pos, chunkSize, nullptr); + chunkSize = std::min(rowsToAdd.numElements - pos, rowIndexStride - indexRows_); + columnWriter_->add(rowsToAdd, pos, chunkSize, nullptr); pos += chunkSize; - indexRows += chunkSize; - stripeRows += chunkSize; + indexRows_ += chunkSize; + stripeRows_ += chunkSize; - if (indexRows >= rowIndexStride) { - columnWriter->createRowIndexEntry(); - indexRows = 0; + if (indexRows_ >= rowIndexStride) { + columnWriter_->createRowIndexEntry(); + indexRows_ = 0; } } } else { - stripeRows += rowsToAdd.numElements; - columnWriter->add(rowsToAdd, 0, rowsToAdd.numElements, nullptr); + stripeRows_ += rowsToAdd.numElements; + columnWriter_->add(rowsToAdd, 0, rowsToAdd.numElements, nullptr); } - if (columnWriter->getEstimatedSize() >= options.getStripeSize()) { + if (columnWriter_->getEstimatedSize() >= options_.getStripeSize()) { writeStripe(); } } void WriterImpl::close() { - if (stripeRows > 0) { + if (stripeRows_ > 0) { writeStripe(); } writeMetadata(); writeFileFooter(); writePostscript(); - outStream->close(); + outStream_->close(); } uint64_t WriterImpl::writeIntermediateFooter() { - if (stripeRows > 0) { + if (stripeRows_ > 0) { writeStripe(); } - if (stripesAtLastFlush != fileFooter.stripes_size()) { + if (stripesAtLastFlush_ != fileFooter_.stripes_size()) { writeMetadata(); writeFileFooter(); writePostscript(); - stripesAtLastFlush = fileFooter.stripes_size(); - outStream->flush(); - lastFlushOffset = outStream->getLength(); - currentOffset = lastFlushOffset; + stripesAtLastFlush_ = fileFooter_.stripes_size(); + outStream_->flush(); + lastFlushOffset_ = outStream_->getLength(); + currentOffset_ = lastFlushOffset_; // init stripe now that we adjusted the currentOffset initStripe(); } - return lastFlushOffset; + return lastFlushOffset_; } void WriterImpl::addUserMetadata(const std::string& name, const std::string& value) { - proto::UserMetadataItem* userMetadataItem = fileFooter.add_metadata(); + proto::UserMetadataItem* userMetadataItem = fileFooter_.add_metadata(); userMetadataItem->set_name(name); userMetadataItem->set_value(value); } @@ -438,64 +438,64 @@ namespace orc { const static size_t magicIdLength = strlen(WriterImpl::magicId); { SCOPED_STOPWATCH(options.getWriterMetrics(), IOBlockingLatencyUs, IOCount); - outStream->write(WriterImpl::magicId, magicIdLength); + outStream_->write(WriterImpl::magicId, magicIdLength); } - currentOffset += magicIdLength; + currentOffset_ += magicIdLength; // Initialize file footer - fileFooter.set_header_length(currentOffset); - fileFooter.set_content_length(0); - fileFooter.set_number_of_rows(0); - fileFooter.set_row_index_stride(static_cast(options.getRowIndexStride())); - fileFooter.set_writer(writerId); - fileFooter.set_software_version(ORC_VERSION); + fileFooter_.set_header_length(currentOffset_); + fileFooter_.set_content_length(0); + fileFooter_.set_number_of_rows(0); + fileFooter_.set_row_index_stride(static_cast(options_.getRowIndexStride())); + fileFooter_.set_writer(writerId); + fileFooter_.set_software_version(ORC_VERSION); uint32_t index = 0; - buildFooterType(type, fileFooter, index); + buildFooterType(type_, fileFooter_, index); // Initialize post script - postScript.set_footer_length(0); - postScript.set_compression(WriterImpl::convertCompressionKind(options.getCompression())); - postScript.set_compression_block_size(options.getCompressionBlockSize()); + postScript_.set_footer_length(0); + postScript_.set_compression(WriterImpl::convertCompressionKind(options_.getCompression())); + postScript_.set_compression_block_size(options_.getCompressionBlockSize()); - postScript.add_version(options.getFileVersion().getMajor()); - postScript.add_version(options.getFileVersion().getMinor()); + postScript_.add_version(options_.getFileVersion().getMajor()); + postScript_.add_version(options_.getFileVersion().getMinor()); - postScript.set_writer_version(WriterVersion_ORC_135); - postScript.set_magic("ORC"); + postScript_.set_writer_version(WriterVersion_ORC_135); + postScript_.set_magic("ORC"); // Initialize first stripe initStripe(); } void WriterImpl::initStripe() { - stripeInfo.set_offset(currentOffset); - stripeInfo.set_index_length(0); - stripeInfo.set_data_length(0); - stripeInfo.set_footer_length(0); - stripeInfo.set_number_of_rows(0); + stripeInfo_.set_offset(currentOffset_); + stripeInfo_.set_index_length(0); + stripeInfo_.set_data_length(0); + stripeInfo_.set_footer_length(0); + stripeInfo_.set_number_of_rows(0); - stripeRows = indexRows = 0; + stripeRows_ = indexRows_ = 0; } void WriterImpl::writeStripe() { - if (options.getEnableIndex() && indexRows != 0) { - columnWriter->createRowIndexEntry(); - indexRows = 0; + if (options_.getEnableIndex() && indexRows_ != 0) { + columnWriter_->createRowIndexEntry(); + indexRows_ = 0; } else { - columnWriter->mergeRowGroupStatsIntoStripeStats(); + columnWriter_->mergeRowGroupStatsIntoStripeStats(); } // dictionary should be written before any stream is flushed - columnWriter->writeDictionary(); + columnWriter_->writeDictionary(); std::vector streams; // write ROW_INDEX streams - if (options.getEnableIndex()) { - columnWriter->writeIndex(streams); + if (options_.getEnableIndex()) { + columnWriter_->writeIndex(streams); } // write streams like PRESENT, DATA, etc. - columnWriter->flush(streams); + columnWriter_->flush(streams); // generate and write stripe footer proto::StripeFooter stripeFooter; @@ -504,28 +504,28 @@ namespace orc { } std::vector encodings; - columnWriter->getColumnEncoding(encodings); + columnWriter_->getColumnEncoding(encodings); for (uint32_t i = 0; i < encodings.size(); ++i) { *stripeFooter.add_columns() = encodings[i]; } - stripeFooter.set_writer_timezone(options.getTimezoneName()); + stripeFooter.set_writer_timezone(options_.getTimezoneName()); // add stripe statistics to metadata - proto::StripeStatistics* stripeStats = metadata.add_stripe_stats(); + proto::StripeStatistics* stripeStats = metadata_.add_stripe_stats(); std::vector colStats; - columnWriter->getStripeStatistics(colStats); + columnWriter_->getStripeStatistics(colStats); for (uint32_t i = 0; i != colStats.size(); ++i) { *stripeStats->add_col_stats() = colStats[i]; } // merge stripe stats into file stats and clear stripe stats - columnWriter->mergeStripeStatsIntoFileStats(); + columnWriter_->mergeStripeStatsIntoFileStats(); - if (!stripeFooter.SerializeToZeroCopyStream(compressionStream.get())) { + if (!stripeFooter.SerializeToZeroCopyStream(compressionStream_.get())) { throw std::logic_error("Failed to write stripe footer."); } - uint64_t footerLength = compressionStream->flush(); + uint64_t footerLength = compressionStream_->flush(); // calculate data length and index length uint64_t dataLength = 0; @@ -540,53 +540,53 @@ namespace orc { } // update stripe info - stripeInfo.set_index_length(indexLength); - stripeInfo.set_data_length(dataLength); - stripeInfo.set_footer_length(footerLength); - stripeInfo.set_number_of_rows(stripeRows); + stripeInfo_.set_index_length(indexLength); + stripeInfo_.set_data_length(dataLength); + stripeInfo_.set_footer_length(footerLength); + stripeInfo_.set_number_of_rows(stripeRows_); - *fileFooter.add_stripes() = stripeInfo; + *fileFooter_.add_stripes() = stripeInfo_; - currentOffset = currentOffset + indexLength + dataLength + footerLength; - totalRows += stripeRows; + currentOffset_ = currentOffset_ + indexLength + dataLength + footerLength; + totalRows_ += stripeRows_; - columnWriter->reset(); + columnWriter_->reset(); initStripe(); } void WriterImpl::writeMetadata() { - if (!metadata.SerializeToZeroCopyStream(compressionStream.get())) { + if (!metadata_.SerializeToZeroCopyStream(compressionStream_.get())) { throw std::logic_error("Failed to write metadata."); } - postScript.set_metadata_length(compressionStream.get()->flush()); + postScript_.set_metadata_length(compressionStream_.get()->flush()); } void WriterImpl::writeFileFooter() { - fileFooter.set_content_length(currentOffset - fileFooter.header_length()); - fileFooter.set_number_of_rows(totalRows); + fileFooter_.set_content_length(currentOffset_ - fileFooter_.header_length()); + fileFooter_.set_number_of_rows(totalRows_); // update file statistics std::vector colStats; - columnWriter->getFileStatistics(colStats); - fileFooter.clear_statistics(); + columnWriter_->getFileStatistics(colStats); + fileFooter_.clear_statistics(); for (uint32_t i = 0; i != colStats.size(); ++i) { - *fileFooter.add_statistics() = colStats[i]; + *fileFooter_.add_statistics() = colStats[i]; } - if (!fileFooter.SerializeToZeroCopyStream(compressionStream.get())) { + if (!fileFooter_.SerializeToZeroCopyStream(compressionStream_.get())) { throw std::logic_error("Failed to write file footer."); } - postScript.set_footer_length(compressionStream->flush()); + postScript_.set_footer_length(compressionStream_->flush()); } void WriterImpl::writePostscript() { - if (!postScript.SerializeToZeroCopyStream(bufferedStream.get())) { + if (!postScript_.SerializeToZeroCopyStream(bufferedStream_.get())) { throw std::logic_error("Failed to write post script."); } - unsigned char psLength = static_cast(bufferedStream->flush()); + unsigned char psLength = static_cast(bufferedStream_->flush()); SCOPED_STOPWATCH(options.getWriterMetrics(), IOBlockingLatencyUs, IOCount); - outStream->write(&psLength, sizeof(unsigned char)); + outStream_->write(&psLength, sizeof(unsigned char)); } void WriterImpl::buildFooterType(const Type& t, proto::Footer& footer, uint32_t& index) { diff --git a/c++/src/io/InputStream.cc b/c++/src/io/InputStream.cc index c0a4d81a0f9..06ef40bd4c7 100644 --- a/c++/src/io/InputStream.cc +++ b/c++/src/io/InputStream.cc @@ -39,17 +39,17 @@ namespace orc { } PositionProvider::PositionProvider(const std::list& posns) { - position = posns.begin(); + position_ = posns.begin(); } uint64_t PositionProvider::next() { - uint64_t result = *position; - ++position; + uint64_t result = *position_; + ++position_; return result; } uint64_t PositionProvider::current() { - return *position; + return *position_; } SeekableInputStream::~SeekableInputStream() { @@ -62,26 +62,26 @@ namespace orc { SeekableArrayInputStream::SeekableArrayInputStream(const unsigned char* values, uint64_t size, uint64_t blkSize) - : data(reinterpret_cast(values)) { - length = size; - position = 0; - blockSize = blkSize == 0 ? length : static_cast(blkSize); + : data_(reinterpret_cast(values)) { + length_ = size; + position_ = 0; + blockSize_ = blkSize == 0 ? length_ : static_cast(blkSize); } SeekableArrayInputStream::SeekableArrayInputStream(const char* values, uint64_t size, uint64_t blkSize) - : data(values) { - length = size; - position = 0; - blockSize = blkSize == 0 ? length : static_cast(blkSize); + : data_(values) { + length_ = size; + position_ = 0; + blockSize_ = blkSize == 0 ? length_ : static_cast(blkSize); } bool SeekableArrayInputStream::Next(const void** buffer, int* size) { - uint64_t currentSize = std::min(length - position, blockSize); + uint64_t currentSize = std::min(length_ - position_, blockSize_); if (currentSize > 0) { - *buffer = data + position; + *buffer = data_ + position_; *size = static_cast(currentSize); - position += currentSize; + position_ += currentSize; return true; } *size = 0; @@ -91,8 +91,8 @@ namespace orc { void SeekableArrayInputStream::BackUp(int count) { if (count >= 0) { uint64_t unsignedCount = static_cast(count); - if (unsignedCount <= blockSize && unsignedCount <= position) { - position -= unsignedCount; + if (unsignedCount <= blockSize_ && unsignedCount <= position_) { + position_ -= unsignedCount; } else { throw std::logic_error("Can't backup that much!"); } @@ -102,27 +102,27 @@ namespace orc { bool SeekableArrayInputStream::Skip(int count) { if (count >= 0) { uint64_t unsignedCount = static_cast(count); - if (unsignedCount + position <= length) { - position += unsignedCount; + if (unsignedCount + position_ <= length_) { + position_ += unsignedCount; return true; } else { - position = length; + position_ = length_; } } return false; } google::protobuf::int64 SeekableArrayInputStream::ByteCount() const { - return static_cast(position); + return static_cast(position_); } void SeekableArrayInputStream::seek(PositionProvider& seekPosition) { - position = seekPosition.next(); + position_ = seekPosition.next(); } std::string SeekableArrayInputStream::getName() const { std::ostringstream result; - result << "SeekableArrayInputStream " << position << " of " << length; + result << "SeekableArrayInputStream " << position_ << " of " << length_; return result.str(); } @@ -131,16 +131,16 @@ namespace orc { } SeekableFileInputStream::SeekableFileInputStream(InputStream* stream, uint64_t offset, - uint64_t byteCount, MemoryPool& _pool, - uint64_t _blockSize) - : pool(_pool), - input(stream), - start(offset), - length(byteCount), - blockSize(computeBlock(_blockSize, length)) { - position = 0; - buffer.reset(new DataBuffer(pool)); - pushBack = 0; + uint64_t byteCount, MemoryPool& pool, + uint64_t blockSize) + : pool_(pool), + input_(stream), + start_(offset), + length_(byteCount), + blockSize_(computeBlock(blockSize, length_)) { + position_ = 0; + buffer_.reset(new DataBuffer(pool_)); + pushBack_ = 0; } SeekableFileInputStream::~SeekableFileInputStream() { @@ -149,19 +149,19 @@ namespace orc { bool SeekableFileInputStream::Next(const void** data, int* size) { uint64_t bytesRead; - if (pushBack != 0) { - *data = buffer->data() + (buffer->size() - pushBack); - bytesRead = pushBack; + if (pushBack_ != 0) { + *data = buffer_->data() + (buffer_->size() - pushBack_); + bytesRead = pushBack_; } else { - bytesRead = std::min(length - position, blockSize); - buffer->resize(bytesRead); + bytesRead = std::min(length_ - position_, blockSize_); + buffer_->resize(bytesRead); if (bytesRead > 0) { - input->read(buffer->data(), bytesRead, start + position); - *data = static_cast(buffer->data()); + input_->read(buffer_->data(), bytesRead, start_ + position_); + *data = static_cast(buffer_->data()); } } - position += bytesRead; - pushBack = 0; + position_ += bytesRead; + pushBack_ = 0; *size = static_cast(bytesRead); return bytesRead != 0; } @@ -171,14 +171,14 @@ namespace orc { throw std::logic_error("can't backup negative distances"); } uint64_t count = static_cast(signedCount); - if (pushBack > 0) { + if (pushBack_ > 0) { throw std::logic_error("can't backup unless we just called Next"); } - if (count > blockSize || count > position) { + if (count > blockSize_ || count > position_) { throw std::logic_error("can't backup that far"); } - pushBack = static_cast(count); - position -= pushBack; + pushBack_ = static_cast(count); + position_ -= pushBack_; } bool SeekableFileInputStream::Skip(int signedCount) { @@ -186,27 +186,27 @@ namespace orc { return false; } uint64_t count = static_cast(signedCount); - position = std::min(position + count, length); - pushBack = 0; - return position < length; + position_ = std::min(position_ + count, length_); + pushBack_ = 0; + return position_ < length_; } int64_t SeekableFileInputStream::ByteCount() const { - return static_cast(position); + return static_cast(position_); } void SeekableFileInputStream::seek(PositionProvider& location) { - position = location.next(); - if (position > length) { - position = length; + position_ = location.next(); + if (position_ > length_) { + position_ = length_; throw std::logic_error("seek too far"); } - pushBack = 0; + pushBack_ = 0; } std::string SeekableFileInputStream::getName() const { std::ostringstream result; - result << input->getName() << " from " << start << " for " << length; + result << input_->getName() << " from " << start_ << " for " << length_; return result.str(); } diff --git a/c++/src/io/InputStream.hh b/c++/src/io/InputStream.hh index 5e1b4ba0ab0..07aa623b5f7 100644 --- a/c++/src/io/InputStream.hh +++ b/c++/src/io/InputStream.hh @@ -35,7 +35,7 @@ namespace orc { class PositionProvider { private: - std::list::const_iterator position; + std::list::const_iterator position_; public: PositionProvider(const std::list& positions); @@ -60,14 +60,14 @@ namespace orc { */ class SeekableArrayInputStream : public SeekableInputStream { private: - const char* data; - uint64_t length; - uint64_t position; - uint64_t blockSize; + const char* data_; + uint64_t length_; + uint64_t position_; + uint64_t blockSize_; public: - SeekableArrayInputStream(const unsigned char* list, uint64_t length, uint64_t block_size = 0); - SeekableArrayInputStream(const char* list, uint64_t length, uint64_t block_size = 0); + SeekableArrayInputStream(const unsigned char* list, uint64_t length, uint64_t blockSize = 0); + SeekableArrayInputStream(const char* list, uint64_t length, uint64_t blockSize = 0); virtual ~SeekableArrayInputStream() override; virtual bool Next(const void** data, int* size) override; virtual void BackUp(int count) override; @@ -82,14 +82,14 @@ namespace orc { */ class SeekableFileInputStream : public SeekableInputStream { private: - MemoryPool& pool; - InputStream* const input; - const uint64_t start; - const uint64_t length; - const uint64_t blockSize; - std::unique_ptr > buffer; - uint64_t position; - uint64_t pushBack; + MemoryPool& pool_; + InputStream* const input_; + const uint64_t start_; + const uint64_t length_; + const uint64_t blockSize_; + std::unique_ptr > buffer_; + uint64_t position_; + uint64_t pushBack_; public: SeekableFileInputStream(InputStream* input, uint64_t offset, uint64_t byteCount, diff --git a/c++/src/io/OutputStream.cc b/c++/src/io/OutputStream.cc index ac5339c6444..6f0d729bef4 100644 --- a/c++/src/io/OutputStream.cc +++ b/c++/src/io/OutputStream.cc @@ -29,11 +29,11 @@ namespace orc { } BufferedOutputStream::BufferedOutputStream(MemoryPool& pool, OutputStream* outStream, - uint64_t capacity_, uint64_t blockSize_, - WriterMetrics* metrics_) - : outputStream(outStream), blockSize(blockSize_), metrics(metrics_) { - dataBuffer.reset(new BlockBuffer(pool, blockSize)); - dataBuffer->reserve(capacity_); + uint64_t capacity, uint64_t blockSize, + WriterMetrics* metrics) + : outputStream_(outStream), blockSize_(blockSize), metrics_(metrics) { + dataBuffer_.reset(new BlockBuffer(pool, blockSize)); + dataBuffer_->reserve(capacity); } BufferedOutputStream::~BufferedOutputStream() { @@ -41,9 +41,9 @@ namespace orc { } bool BufferedOutputStream::Next(void** buffer, int* size) { - auto block = dataBuffer->getNextBlock(); + auto block = dataBuffer_->getNextBlock(); if (block.data == nullptr) { - throw std::logic_error("Failed to get next buffer from block buffer."); + throw std::logic_error("Failed to get next buffer_ from block buffer_."); } *buffer = block.data; *size = static_cast(block.size); @@ -53,8 +53,8 @@ namespace orc { void BufferedOutputStream::BackUp(int count) { if (count >= 0) { uint64_t unsignedCount = static_cast(count); - if (unsignedCount <= dataBuffer->size()) { - dataBuffer->resize(dataBuffer->size() - unsignedCount); + if (unsignedCount <= dataBuffer_->size()) { + dataBuffer_->resize(dataBuffer_->size() - unsignedCount); } else { throw std::logic_error("Can't backup that much!"); } @@ -62,7 +62,7 @@ namespace orc { } google::protobuf::int64 BufferedOutputStream::ByteCount() const { - return static_cast(dataBuffer->size()); + return static_cast(dataBuffer_->size()); } bool BufferedOutputStream::WriteAliasedRaw(const void*, int) { @@ -75,67 +75,67 @@ namespace orc { std::string BufferedOutputStream::getName() const { std::ostringstream result; - result << "BufferedOutputStream " << dataBuffer->size() << " of " << dataBuffer->capacity(); + result << "BufferedOutputStream " << dataBuffer_->size() << " of " << dataBuffer_->capacity(); return result.str(); } uint64_t BufferedOutputStream::getSize() const { - return dataBuffer->size(); + return dataBuffer_->size(); } uint64_t BufferedOutputStream::flush() { - uint64_t dataSize = dataBuffer->size(); - // flush data buffer into outputStream + uint64_t dataSize = dataBuffer_->size(); + // flush data buffer_ into outputStream if (dataSize > 0) { SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount); - dataBuffer->writeTo(outputStream, metrics); + dataBuffer_->writeTo(outputStream_, metrics_); } - dataBuffer->resize(0); + dataBuffer_->resize(0); return dataSize; } void BufferedOutputStream::suppress() { - dataBuffer->resize(0); + dataBuffer_->resize(0); } void AppendOnlyBufferedStream::write(const char* data, size_t size) { size_t dataOffset = 0; while (size > 0) { - if (bufferOffset == bufferLength) { - if (!outStream->Next(reinterpret_cast(&buffer), &bufferLength)) { - throw std::logic_error("Failed to allocate buffer."); + if (bufferOffset_ == bufferLength_) { + if (!outStream_->Next(reinterpret_cast(&buffer_), &bufferLength_)) { + throw std::logic_error("Failed to allocate buffer_."); } - bufferOffset = 0; + bufferOffset_ = 0; } - size_t len = std::min(static_cast(bufferLength - bufferOffset), size); - memcpy(buffer + bufferOffset, data + dataOffset, len); - bufferOffset += static_cast(len); + size_t len = std::min(static_cast(bufferLength_ - bufferOffset_), size); + memcpy(buffer_ + bufferOffset_, data + dataOffset, len); + bufferOffset_ += static_cast(len); dataOffset += len; size -= len; } } uint64_t AppendOnlyBufferedStream::getSize() const { - return outStream->getSize(); + return outStream_->getSize(); } uint64_t AppendOnlyBufferedStream::flush() { - outStream->BackUp(bufferLength - bufferOffset); - bufferOffset = bufferLength = 0; - buffer = nullptr; - return outStream->flush(); + outStream_->BackUp(bufferLength_ - bufferOffset_); + bufferOffset_ = bufferLength_ = 0; + buffer_ = nullptr; + return outStream_->flush(); } void AppendOnlyBufferedStream::recordPosition(PositionRecorder* recorder) const { - uint64_t flushedSize = outStream->getSize(); - uint64_t unflushedSize = static_cast(bufferOffset); - if (outStream->isCompressed()) { + uint64_t flushedSize = outStream_->getSize(); + uint64_t unflushedSize = static_cast(bufferOffset_); + if (outStream_->isCompressed()) { // start of the compression chunk in the stream recorder->add(flushedSize); // number of decompressed bytes that need to be consumed recorder->add(unflushedSize); } else { - flushedSize -= static_cast(bufferLength); + flushedSize -= static_cast(bufferLength_); // byte offset of the start location recorder->add(flushedSize + unflushedSize); } diff --git a/c++/src/io/OutputStream.hh b/c++/src/io/OutputStream.hh index 146b1bda71e..c63bc805bba 100644 --- a/c++/src/io/OutputStream.hh +++ b/c++/src/io/OutputStream.hh @@ -49,14 +49,14 @@ namespace orc { */ class BufferedOutputStream : public google::protobuf::io::ZeroCopyOutputStream { private: - OutputStream* outputStream; - std::unique_ptr dataBuffer; - uint64_t blockSize; - WriterMetrics* metrics; + OutputStream* outputStream_; + std::unique_ptr dataBuffer_; + uint64_t blockSize_; + WriterMetrics* metrics_; public: BufferedOutputStream(MemoryPool& pool, OutputStream* outStream, uint64_t capacity, - uint64_t block_size, WriterMetrics* metrics); + uint64_t blockSize, WriterMetrics* metrics); virtual ~BufferedOutputStream() override; virtual bool Next(void** data, int* size) override; @@ -84,15 +84,15 @@ namespace orc { */ class AppendOnlyBufferedStream { private: - std::unique_ptr outStream; - char* buffer; - int bufferOffset, bufferLength; + std::unique_ptr outStream_; + char* buffer_; + int bufferOffset_, bufferLength_; public: - AppendOnlyBufferedStream(std::unique_ptr _outStream) - : outStream(std::move(_outStream)) { - buffer = nullptr; - bufferOffset = bufferLength = 0; + AppendOnlyBufferedStream(std::unique_ptr outStream) + : outStream_(std::move(outStream)) { + buffer_ = nullptr; + bufferOffset_ = bufferLength_ = 0; } void write(const char* data, size_t size); diff --git a/c++/src/sargs/ExpressionTree.cc b/c++/src/sargs/ExpressionTree.cc index 9176c1f6c3b..89de24f160a 100644 --- a/c++/src/sargs/ExpressionTree.cc +++ b/c++/src/sargs/ExpressionTree.cc @@ -24,39 +24,39 @@ namespace orc { ExpressionTree::ExpressionTree(Operator op) - : mOperator(op), mLeaf(UNUSED_LEAF), mConstant(TruthValue::YES_NO_NULL) {} + : mOperator_(op), mLeaf_(UNUSED_LEAF), mConstant_(TruthValue::YES_NO_NULL) {} ExpressionTree::ExpressionTree(Operator op, std::initializer_list children) - : mOperator(op), - mChildren(children.begin(), children.end()), - mLeaf(UNUSED_LEAF), - mConstant(TruthValue::YES_NO_NULL) { + : mOperator_(op), + mChildren_(children.begin(), children.end()), + mLeaf_(UNUSED_LEAF), + mConstant_(TruthValue::YES_NO_NULL) { // PASS } ExpressionTree::ExpressionTree(size_t leaf) - : mOperator(Operator::LEAF), mChildren(), mLeaf(leaf), mConstant(TruthValue::YES_NO_NULL) { + : mOperator_(Operator::LEAF), mChildren_(), mLeaf_(leaf), mConstant_(TruthValue::YES_NO_NULL) { // PASS } ExpressionTree::ExpressionTree(TruthValue constant) - : mOperator(Operator::CONSTANT), mChildren(), mLeaf(UNUSED_LEAF), mConstant(constant) { + : mOperator_(Operator::CONSTANT), mChildren_(), mLeaf_(UNUSED_LEAF), mConstant_(constant) { // PASS } ExpressionTree::ExpressionTree(const ExpressionTree& other) - : mOperator(other.mOperator), mLeaf(other.mLeaf), mConstant(other.mConstant) { - for (TreeNode child : other.mChildren) { - mChildren.emplace_back(std::make_shared(*child)); + : mOperator_(other.mOperator_), mLeaf_(other.mLeaf_), mConstant_(other.mConstant_) { + for (TreeNode child : other.mChildren_) { + mChildren_.emplace_back(std::make_shared(*child)); } } ExpressionTree::Operator ExpressionTree::getOperator() const { - return mOperator; + return mOperator_; } const std::vector& ExpressionTree::getChildren() const { - return mChildren; + return mChildren_; } std::vector& ExpressionTree::getChildren() { @@ -65,7 +65,7 @@ namespace orc { } const TreeNode ExpressionTree::getChild(size_t i) const { - return mChildren.at(i); + return mChildren_.at(i); } TreeNode ExpressionTree::getChild(size_t i) { @@ -74,47 +74,47 @@ namespace orc { } TruthValue ExpressionTree::getConstant() const { - assert(mOperator == Operator::CONSTANT); - return mConstant; + assert(mOperator_ == Operator::CONSTANT); + return mConstant_; } size_t ExpressionTree::getLeaf() const { - assert(mOperator == Operator::LEAF); - return mLeaf; + assert(mOperator_ == Operator::LEAF); + return mLeaf_; } void ExpressionTree::setLeaf(size_t leaf) { - assert(mOperator == Operator::LEAF); - mLeaf = leaf; + assert(mOperator_ == Operator::LEAF); + mLeaf_ = leaf; } void ExpressionTree::addChild(TreeNode child) { - mChildren.push_back(child); + mChildren_.push_back(child); } TruthValue ExpressionTree::evaluate(const std::vector& leaves) const { TruthValue result; - switch (mOperator) { + switch (mOperator_) { case Operator::OR: { - result = mChildren.at(0)->evaluate(leaves); - for (size_t i = 1; i < mChildren.size() && !isNeeded(result); ++i) { - result = mChildren.at(i)->evaluate(leaves) || result; + result = mChildren_.at(0)->evaluate(leaves); + for (size_t i = 1; i < mChildren_.size() && !isNeeded(result); ++i) { + result = mChildren_.at(i)->evaluate(leaves) || result; } return result; } case Operator::AND: { - result = mChildren.at(0)->evaluate(leaves); - for (size_t i = 1; i < mChildren.size() && isNeeded(result); ++i) { - result = mChildren.at(i)->evaluate(leaves) && result; + result = mChildren_.at(0)->evaluate(leaves); + for (size_t i = 1; i < mChildren_.size() && isNeeded(result); ++i) { + result = mChildren_.at(i)->evaluate(leaves) && result; } return result; } case Operator::NOT: - return !mChildren.at(0)->evaluate(leaves); + return !mChildren_.at(0)->evaluate(leaves); case Operator::LEAF: - return leaves[mLeaf]; + return leaves[mLeaf_]; case Operator::CONSTANT: - return mConstant; + return mConstant_; default: throw std::invalid_argument("Unknown operator!"); } @@ -143,29 +143,29 @@ namespace orc { std::string ExpressionTree::toString() const { std::ostringstream sstream; - switch (mOperator) { + switch (mOperator_) { case Operator::OR: sstream << "(or"; - for (const auto& child : mChildren) { + for (const auto& child : mChildren_) { sstream << ' ' << child->toString(); } sstream << ')'; break; case Operator::AND: sstream << "(and"; - for (const auto& child : mChildren) { + for (const auto& child : mChildren_) { sstream << ' ' << child->toString(); } sstream << ')'; break; case Operator::NOT: - sstream << "(not " << mChildren.at(0)->toString() << ')'; + sstream << "(not " << mChildren_.at(0)->toString() << ')'; break; case Operator::LEAF: - sstream << "leaf-" << mLeaf; + sstream << "leaf-" << mLeaf_; break; case Operator::CONSTANT: - sstream << to_string(mConstant); + sstream << to_string(mConstant_); break; default: throw std::invalid_argument("unknown operator!"); diff --git a/c++/src/sargs/ExpressionTree.hh b/c++/src/sargs/ExpressionTree.hh index 3e0b331a2df..39d7567cfd8 100644 --- a/c++/src/sargs/ExpressionTree.hh +++ b/c++/src/sargs/ExpressionTree.hh @@ -74,10 +74,10 @@ namespace orc { TruthValue evaluate(const std::vector& leaves) const; private: - Operator mOperator; - std::vector mChildren; - size_t mLeaf; - TruthValue mConstant; + Operator mOperator_; + std::vector mChildren_; + size_t mLeaf_; + TruthValue mConstant_; }; } // namespace orc diff --git a/c++/src/sargs/Literal.cc b/c++/src/sargs/Literal.cc index c0cdd62201b..ac14f6c74a8 100644 --- a/c++/src/sargs/Literal.cc +++ b/c++/src/sargs/Literal.cc @@ -26,196 +26,196 @@ namespace orc { Literal::Literal(PredicateDataType type) { - mType = type; - mValue.DecimalVal = 0; - mSize = 0; - mIsNull = true; - mPrecision = 0; - mScale = 0; - mHashCode = 0; + mType_ = type; + mValue_.DecimalVal = 0; + mSize_ = 0; + mIsNull_ = true; + mPrecision_ = 0; + mScale_ = 0; + mHashCode_ = 0; } Literal::Literal(int64_t val) { - mType = PredicateDataType::LONG; - mValue.IntVal = val; - mSize = sizeof(val); - mIsNull = false; - mPrecision = 0; - mScale = 0; - mHashCode = hashCode(); + mType_ = PredicateDataType::LONG; + mValue_.IntVal = val; + mSize_ = sizeof(val); + mIsNull_ = false; + mPrecision_ = 0; + mScale_ = 0; + mHashCode_ = hashCode(); } Literal::Literal(double val) { - mType = PredicateDataType::FLOAT; - mValue.DoubleVal = val; - mSize = sizeof(val); - mIsNull = false; - mPrecision = 0; - mScale = 0; - mHashCode = hashCode(); + mType_ = PredicateDataType::FLOAT; + mValue_.DoubleVal = val; + mSize_ = sizeof(val); + mIsNull_ = false; + mPrecision_ = 0; + mScale_ = 0; + mHashCode_ = hashCode(); } Literal::Literal(bool val) { - mType = PredicateDataType::BOOLEAN; - mValue.BooleanVal = val; - mSize = sizeof(val); - mIsNull = false; - mPrecision = 0; - mScale = 0; - mHashCode = hashCode(); + mType_ = PredicateDataType::BOOLEAN; + mValue_.BooleanVal = val; + mSize_ = sizeof(val); + mIsNull_ = false; + mPrecision_ = 0; + mScale_ = 0; + mHashCode_ = hashCode(); } Literal::Literal(PredicateDataType type, int64_t val) { if (type != PredicateDataType::DATE) { throw std::invalid_argument("only DATE is supported here!"); } - mType = type; - mValue.IntVal = val; - mSize = sizeof(val); - mIsNull = false; - mPrecision = 0; - mScale = 0; - mHashCode = hashCode(); + mType_ = type; + mValue_.IntVal = val; + mSize_ = sizeof(val); + mIsNull_ = false; + mPrecision_ = 0; + mScale_ = 0; + mHashCode_ = hashCode(); } Literal::Literal(const char* str, size_t size) { - mType = PredicateDataType::STRING; - mValue.Buffer = new char[size]; - memcpy(mValue.Buffer, str, size); - mSize = size; - mIsNull = false; - mPrecision = 0; - mScale = 0; - mHashCode = hashCode(); + mType_ = PredicateDataType::STRING; + mValue_.Buffer = new char[size]; + memcpy(mValue_.Buffer, str, size); + mSize_ = size; + mIsNull_ = false; + mPrecision_ = 0; + mScale_ = 0; + mHashCode_ = hashCode(); } Literal::Literal(Int128 val, int32_t precision, int32_t scale) { - mType = PredicateDataType::DECIMAL; - mValue.DecimalVal = val; - mPrecision = precision; - mScale = scale; - mSize = sizeof(Int128); - mIsNull = false; - mHashCode = hashCode(); + mType_ = PredicateDataType::DECIMAL; + mValue_.DecimalVal = val; + mPrecision_ = precision; + mScale_ = scale; + mSize_ = sizeof(Int128); + mIsNull_ = false; + mHashCode_ = hashCode(); } Literal::Literal(int64_t second, int32_t nanos) { - mType = PredicateDataType::TIMESTAMP; - mValue.TimeStampVal.second = second; - mValue.TimeStampVal.nanos = nanos; - mPrecision = 0; - mScale = 0; - mSize = sizeof(Timestamp); - mIsNull = false; - mHashCode = hashCode(); + mType_ = PredicateDataType::TIMESTAMP; + mValue_.TimeStampVal.second = second; + mValue_.TimeStampVal.nanos = nanos; + mPrecision_ = 0; + mScale_ = 0; + mSize_ = sizeof(Timestamp); + mIsNull_ = false; + mHashCode_ = hashCode(); } Literal::Literal(const Literal& r) - : mType(r.mType), mSize(r.mSize), mIsNull(r.mIsNull), mHashCode(r.mHashCode) { - if (mType == PredicateDataType::STRING) { - mValue.Buffer = new char[r.mSize]; - memcpy(mValue.Buffer, r.mValue.Buffer, r.mSize); - mPrecision = 0; - mScale = 0; - } else if (mType == PredicateDataType::DECIMAL) { - mPrecision = r.mPrecision; - mScale = r.mScale; - mValue = r.mValue; - } else if (mType == PredicateDataType::TIMESTAMP) { - mValue.TimeStampVal = r.mValue.TimeStampVal; + : mType_(r.mType_), mSize_(r.mSize_), mIsNull_(r.mIsNull_), mHashCode_(r.mHashCode_) { + if (mType_ == PredicateDataType::STRING) { + mValue_.Buffer = new char[r.mSize_]; + memcpy(mValue_.Buffer, r.mValue_.Buffer, r.mSize_); + mPrecision_ = 0; + mScale_ = 0; + } else if (mType_ == PredicateDataType::DECIMAL) { + mPrecision_ = r.mPrecision_; + mScale_ = r.mScale_; + mValue_ = r.mValue_; + } else if (mType_ == PredicateDataType::TIMESTAMP) { + mValue_.TimeStampVal = r.mValue_.TimeStampVal; } else { - mValue = r.mValue; - mPrecision = 0; - mScale = 0; + mValue_ = r.mValue_; + mPrecision_ = 0; + mScale_ = 0; } } Literal::~Literal() { - if (mType == PredicateDataType::STRING && mValue.Buffer) { - delete[] mValue.Buffer; - mValue.Buffer = nullptr; + if (mType_ == PredicateDataType::STRING && mValue_.Buffer) { + delete[] mValue_.Buffer; + mValue_.Buffer = nullptr; } } Literal& Literal::operator=(const Literal& r) { if (this != &r) { - if (mType == PredicateDataType::STRING && mValue.Buffer) { - delete[] mValue.Buffer; - mValue.Buffer = nullptr; + if (mType_ == PredicateDataType::STRING && mValue_.Buffer) { + delete[] mValue_.Buffer; + mValue_.Buffer = nullptr; } - mType = r.mType; - mSize = r.mSize; - mIsNull = r.mIsNull; - mPrecision = r.mPrecision; - mScale = r.mScale; - if (mType == PredicateDataType::STRING) { - mValue.Buffer = new char[r.mSize]; - memcpy(mValue.Buffer, r.mValue.Buffer, r.mSize); - } else if (mType == PredicateDataType::TIMESTAMP) { - mValue.TimeStampVal = r.mValue.TimeStampVal; + mType_ = r.mType_; + mSize_ = r.mSize_; + mIsNull_ = r.mIsNull_; + mPrecision_ = r.mPrecision_; + mScale_ = r.mScale_; + if (mType_ == PredicateDataType::STRING) { + mValue_.Buffer = new char[r.mSize_]; + memcpy(mValue_.Buffer, r.mValue_.Buffer, r.mSize_); + } else if (mType_ == PredicateDataType::TIMESTAMP) { + mValue_.TimeStampVal = r.mValue_.TimeStampVal; } else { - mValue = r.mValue; + mValue_ = r.mValue_; } - mHashCode = r.mHashCode; + mHashCode_ = r.mHashCode_; } return *this; } std::string Literal::toString() const { - if (mIsNull) { + if (mIsNull_) { return "null"; } std::ostringstream sstream; - switch (mType) { + switch (mType_) { case PredicateDataType::LONG: - sstream << mValue.IntVal; + sstream << mValue_.IntVal; break; case PredicateDataType::DATE: - sstream << mValue.DateVal; + sstream << mValue_.DateVal; break; case PredicateDataType::TIMESTAMP: - sstream << mValue.TimeStampVal.second << "." << mValue.TimeStampVal.nanos; + sstream << mValue_.TimeStampVal.second << "." << mValue_.TimeStampVal.nanos; break; case PredicateDataType::FLOAT: - sstream << mValue.DoubleVal; + sstream << mValue_.DoubleVal; break; case PredicateDataType::BOOLEAN: - sstream << (mValue.BooleanVal ? "true" : "false"); + sstream << (mValue_.BooleanVal ? "true" : "false"); break; case PredicateDataType::STRING: - sstream << std::string(mValue.Buffer, mSize); + sstream << std::string(mValue_.Buffer, mSize_); break; case PredicateDataType::DECIMAL: - sstream << mValue.DecimalVal.toDecimalString(mScale); + sstream << mValue_.DecimalVal.toDecimalString(mScale_); break; } return sstream.str(); } size_t Literal::hashCode() const { - if (mIsNull) { + if (mIsNull_) { return 0; } - switch (mType) { + switch (mType_) { case PredicateDataType::LONG: - return std::hash{}(mValue.IntVal); + return std::hash{}(mValue_.IntVal); case PredicateDataType::DATE: - return std::hash{}(mValue.DateVal); + return std::hash{}(mValue_.DateVal); case PredicateDataType::TIMESTAMP: - return std::hash{}(mValue.TimeStampVal.second) * 17 + - std::hash{}(mValue.TimeStampVal.nanos); + return std::hash{}(mValue_.TimeStampVal.second) * 17 + + std::hash{}(mValue_.TimeStampVal.nanos); case PredicateDataType::FLOAT: - return std::hash{}(mValue.DoubleVal); + return std::hash{}(mValue_.DoubleVal); case PredicateDataType::BOOLEAN: - return std::hash{}(mValue.BooleanVal); + return std::hash{}(mValue_.BooleanVal); case PredicateDataType::STRING: - return std::hash{}(std::string(mValue.Buffer, mSize)); + return std::hash{}(std::string(mValue_.Buffer, mSize_)); case PredicateDataType::DECIMAL: // current glibc does not support hash - return std::hash{}(mValue.IntVal); + return std::hash{}(mValue_.IntVal); default: return 0; } @@ -225,30 +225,30 @@ namespace orc { if (this == &r) { return true; } - if (mHashCode != r.mHashCode || mType != r.mType || mIsNull != r.mIsNull) { + if (mHashCode_ != r.mHashCode_ || mType_ != r.mType_ || mIsNull_ != r.mIsNull_) { return false; } - if (mIsNull) { + if (mIsNull_) { return true; } - switch (mType) { + switch (mType_) { case PredicateDataType::LONG: - return mValue.IntVal == r.mValue.IntVal; + return mValue_.IntVal == r.mValue_.IntVal; case PredicateDataType::DATE: - return mValue.DateVal == r.mValue.DateVal; + return mValue_.DateVal == r.mValue_.DateVal; case PredicateDataType::TIMESTAMP: - return mValue.TimeStampVal == r.mValue.TimeStampVal; + return mValue_.TimeStampVal == r.mValue_.TimeStampVal; case PredicateDataType::FLOAT: - return std::fabs(mValue.DoubleVal - r.mValue.DoubleVal) < + return std::fabs(mValue_.DoubleVal - r.mValue_.DoubleVal) < std::numeric_limits::epsilon(); case PredicateDataType::BOOLEAN: - return mValue.BooleanVal == r.mValue.BooleanVal; + return mValue_.BooleanVal == r.mValue_.BooleanVal; case PredicateDataType::STRING: - return mSize == r.mSize && memcmp(mValue.Buffer, r.mValue.Buffer, mSize) == 0; + return mSize_ == r.mSize_ && memcmp(mValue_.Buffer, r.mValue_.Buffer, mSize_) == 0; case PredicateDataType::DECIMAL: - return mValue.DecimalVal == r.mValue.DecimalVal; + return mValue_.DecimalVal == r.mValue_.DecimalVal; default: return true; } @@ -269,38 +269,38 @@ namespace orc { } int64_t Literal::getLong() const { - validate(mIsNull, mType, PredicateDataType::LONG); - return mValue.IntVal; + validate(mIsNull_, mType_, PredicateDataType::LONG); + return mValue_.IntVal; } int64_t Literal::getDate() const { - validate(mIsNull, mType, PredicateDataType::DATE); - return mValue.DateVal; + validate(mIsNull_, mType_, PredicateDataType::DATE); + return mValue_.DateVal; } Literal::Timestamp Literal::getTimestamp() const { - validate(mIsNull, mType, PredicateDataType::TIMESTAMP); - return mValue.TimeStampVal; + validate(mIsNull_, mType_, PredicateDataType::TIMESTAMP); + return mValue_.TimeStampVal; } double Literal::getFloat() const { - validate(mIsNull, mType, PredicateDataType::FLOAT); - return mValue.DoubleVal; + validate(mIsNull_, mType_, PredicateDataType::FLOAT); + return mValue_.DoubleVal; } std::string Literal::getString() const { - validate(mIsNull, mType, PredicateDataType::STRING); - return std::string(mValue.Buffer, mSize); + validate(mIsNull_, mType_, PredicateDataType::STRING); + return std::string(mValue_.Buffer, mSize_); } bool Literal::getBool() const { - validate(mIsNull, mType, PredicateDataType::BOOLEAN); - return mValue.BooleanVal; + validate(mIsNull_, mType_, PredicateDataType::BOOLEAN); + return mValue_.BooleanVal; } Decimal Literal::getDecimal() const { - validate(mIsNull, mType, PredicateDataType::DECIMAL); - return Decimal(mValue.DecimalVal, mScale); + validate(mIsNull_, mType_, PredicateDataType::DECIMAL); + return Decimal(mValue_.DecimalVal, mScale_); } } // namespace orc diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc index 525901b1f75..7e5b3c2e5d0 100644 --- a/c++/src/sargs/PredicateLeaf.cc +++ b/c++/src/sargs/PredicateLeaf.cc @@ -30,77 +30,77 @@ namespace orc { PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName, Literal literal) - : mOperator(op), mType(type), mColumnName(colName), mHasColumnName(true), mColumnId(0) { - mLiterals.emplace_back(literal); - mHashCode = hashCode(); + : mOperator_(op), mType_(type), mColumnName_(colName), mHasColumnName_(true), mColumnId_(0) { + mLiterals_.emplace_back(literal); + mHashCode_ = hashCode(); validate(); } PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId, Literal literal) - : mOperator(op), mType(type), mHasColumnName(false), mColumnId(columnId) { - mLiterals.emplace_back(literal); - mHashCode = hashCode(); + : mOperator_(op), mType_(type), mHasColumnName_(false), mColumnId_(columnId) { + mLiterals_.emplace_back(literal); + mHashCode_ = hashCode(); validate(); } PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName, const std::initializer_list& literals) - : mOperator(op), - mType(type), - mColumnName(colName), - mHasColumnName(true), - mLiterals(literals.begin(), literals.end()) { - mHashCode = hashCode(); + : mOperator_(op), + mType_(type), + mColumnName_(colName), + mHasColumnName_(true), + mLiterals_(literals.begin(), literals.end()) { + mHashCode_ = hashCode(); validate(); } PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId, const std::initializer_list& literals) - : mOperator(op), - mType(type), - mHasColumnName(false), - mColumnId(columnId), - mLiterals(literals.begin(), literals.end()) { - mHashCode = hashCode(); + : mOperator_(op), + mType_(type), + mHasColumnName_(false), + mColumnId_(columnId), + mLiterals_(literals.begin(), literals.end()) { + mHashCode_ = hashCode(); validate(); } PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, const std::string& colName, const std::vector& literals) - : mOperator(op), - mType(type), - mColumnName(colName), - mHasColumnName(true), - mLiterals(literals.begin(), literals.end()) { - mHashCode = hashCode(); + : mOperator_(op), + mType_(type), + mColumnName_(colName), + mHasColumnName_(true), + mLiterals_(literals.begin(), literals.end()) { + mHashCode_ = hashCode(); validate(); } PredicateLeaf::PredicateLeaf(Operator op, PredicateDataType type, uint64_t columnId, const std::vector& literals) - : mOperator(op), - mType(type), - mHasColumnName(false), - mColumnId(columnId), - mLiterals(literals.begin(), literals.end()) { - mHashCode = hashCode(); + : mOperator_(op), + mType_(type), + mHasColumnName_(false), + mColumnId_(columnId), + mLiterals_(literals.begin(), literals.end()) { + mHashCode_ = hashCode(); validate(); } void PredicateLeaf::validateColumn() const { - if (mHasColumnName && mColumnName.empty()) { + if (mHasColumnName_ && mColumnName_.empty()) { throw std::invalid_argument("column name should not be empty"); - } else if (!mHasColumnName && mColumnId == INVALID_COLUMN_ID) { + } else if (!mHasColumnName_ && mColumnId_ == INVALID_COLUMN_ID) { throw std::invalid_argument("invalid column id"); } } void PredicateLeaf::validate() const { - switch (mOperator) { + switch (mOperator_) { case Operator::IS_NULL: validateColumn(); - if (!mLiterals.empty()) { + if (!mLiterals_.empty()) { throw std::invalid_argument("No literal is required!"); } break; @@ -109,28 +109,28 @@ namespace orc { case Operator::LESS_THAN: case Operator::LESS_THAN_EQUALS: validateColumn(); - if (mLiterals.size() != 1) { + if (mLiterals_.size() != 1) { throw std::invalid_argument("One literal is required!"); } - if (static_cast(mLiterals.at(0).getType()) != static_cast(mType)) { + if (static_cast(mLiterals_.at(0).getType()) != static_cast(mType_)) { throw std::invalid_argument("leaf and literal types do not match!"); } break; case Operator::IN: validateColumn(); - if (mLiterals.size() < 2) { + if (mLiterals_.size() < 2) { throw std::invalid_argument("At least two literals are required!"); } - for (auto literal : mLiterals) { - if (static_cast(literal.getType()) != static_cast(mType)) { + for (auto literal : mLiterals_) { + if (static_cast(literal.getType()) != static_cast(mType_)) { throw std::invalid_argument("leaf and literal types do not match!"); } } break; case Operator::BETWEEN: validateColumn(); - for (auto literal : mLiterals) { - if (static_cast(literal.getType()) != static_cast(mType)) { + for (auto literal : mLiterals_) { + if (static_cast(literal.getType()) != static_cast(mType_)) { throw std::invalid_argument("leaf and literal types do not match!"); } } @@ -141,40 +141,40 @@ namespace orc { } PredicateLeaf::Operator PredicateLeaf::getOperator() const { - return mOperator; + return mOperator_; } PredicateDataType PredicateLeaf::getType() const { - return mType; + return mType_; } bool PredicateLeaf::hasColumnName() const { - return mHasColumnName; + return mHasColumnName_; } /** * Get the simple column name. */ const std::string& PredicateLeaf::getColumnName() const { - return mColumnName; + return mColumnName_; } uint64_t PredicateLeaf::getColumnId() const { - return mColumnId; + return mColumnId_; } /** * Get the literal half of the predicate leaf. */ Literal PredicateLeaf::getLiteral() const { - return mLiterals.at(0); + return mLiterals_.at(0); } /** * For operators with multiple literals (IN and BETWEEN), get the literals. */ const std::vector& PredicateLeaf::getLiteralList() const { - return mLiterals; + return mLiterals_; } static std::string getLiteralString(const std::vector& literals) { @@ -195,40 +195,40 @@ namespace orc { } std::string PredicateLeaf::columnDebugString() const { - if (mHasColumnName) return mColumnName; + if (mHasColumnName_) return mColumnName_; std::ostringstream sstream; - sstream << "column(id=" << mColumnId << ')'; + sstream << "column(id=" << mColumnId_ << ')'; return sstream.str(); } std::string PredicateLeaf::toString() const { std::ostringstream sstream; sstream << '('; - switch (mOperator) { + switch (mOperator_) { case Operator::IS_NULL: sstream << columnDebugString() << " is null"; break; case Operator::EQUALS: - sstream << columnDebugString() << " = " << getLiteralString(mLiterals); + sstream << columnDebugString() << " = " << getLiteralString(mLiterals_); break; case Operator::NULL_SAFE_EQUALS: - sstream << columnDebugString() << " null_safe_= " << getLiteralString(mLiterals); + sstream << columnDebugString() << " null_safe_= " << getLiteralString(mLiterals_); break; case Operator::LESS_THAN: - sstream << columnDebugString() << " < " << getLiteralString(mLiterals); + sstream << columnDebugString() << " < " << getLiteralString(mLiterals_); break; case Operator::LESS_THAN_EQUALS: - sstream << columnDebugString() << " <= " << getLiteralString(mLiterals); + sstream << columnDebugString() << " <= " << getLiteralString(mLiterals_); break; case Operator::IN: - sstream << columnDebugString() << " in " << getLiteralsString(mLiterals); + sstream << columnDebugString() << " in " << getLiteralsString(mLiterals_); break; case Operator::BETWEEN: - sstream << columnDebugString() << " between " << getLiteralsString(mLiterals); + sstream << columnDebugString() << " between " << getLiteralsString(mLiterals_); break; default: sstream << "unknown operator, column: " << columnDebugString() - << ", literals: " << getLiteralsString(mLiterals); + << ", literals: " << getLiteralsString(mLiterals_); } sstream << ')'; return sstream.str(); @@ -236,25 +236,25 @@ namespace orc { size_t PredicateLeaf::hashCode() const { size_t value = 0; - std::for_each(mLiterals.cbegin(), mLiterals.cend(), + std::for_each(mLiterals_.cbegin(), mLiterals_.cend(), [&](const Literal& literal) { value = value * 17 + literal.getHashCode(); }); auto colHash = - mHasColumnName ? std::hash{}(mColumnName) : std::hash{}(mColumnId); - return value * 103 * 101 * 3 * 17 + std::hash{}(static_cast(mOperator)) + - std::hash{}(static_cast(mType)) * 17 + colHash * 3 * 17; + mHasColumnName_ ? std::hash{}(mColumnName_) : std::hash{}(mColumnId_); + return value * 103 * 101 * 3 * 17 + std::hash{}(static_cast(mOperator_)) + + std::hash{}(static_cast(mType_)) * 17 + colHash * 3 * 17; } bool PredicateLeaf::operator==(const PredicateLeaf& r) const { if (this == &r) { return true; } - if (mHashCode != r.mHashCode || mType != r.mType || mOperator != r.mOperator || - mHasColumnName != r.mHasColumnName || mColumnName != r.mColumnName || - mColumnId != r.mColumnId || mLiterals.size() != r.mLiterals.size()) { + if (mHashCode_ != r.mHashCode_ || mType_ != r.mType_ || mOperator_ != r.mOperator_ || + mHasColumnName_ != r.mHasColumnName_ || mColumnName_ != r.mColumnName_ || + mColumnId_ != r.mColumnId_ || mLiterals_.size() != r.mLiterals_.size()) { return false; } - for (size_t i = 0; i != mLiterals.size(); ++i) { - if (mLiterals[i] != r.mLiterals[i]) { + for (size_t i = 0; i != mLiterals_.size(); ++i) { + if (mLiterals_[i] != r.mLiterals_[i]) { return false; } } @@ -507,12 +507,12 @@ namespace orc { TruthValue PredicateLeaf::evaluatePredicateMinMax(const proto::ColumnStatistics& colStats) const { TruthValue result = TruthValue::YES_NO_NULL; - switch (mType) { + switch (mType_) { case PredicateDataType::LONG: { if (colStats.has_int_statistics() && colStats.int_statistics().has_minimum() && colStats.int_statistics().has_maximum()) { const auto& stats = colStats.int_statistics(); - result = evaluatePredicateRange(mOperator, literal2Long(mLiterals), stats.minimum(), + result = evaluatePredicateRange(mOperator_, literal2Long(mLiterals_), stats.minimum(), stats.maximum(), colStats.has_null()); } break; @@ -524,7 +524,7 @@ namespace orc { if (!std::isfinite(stats.sum())) { result = colStats.has_null() ? TruthValue::YES_NO_NULL : TruthValue::YES_NO; } else { - result = evaluatePredicateRange(mOperator, literal2Double(mLiterals), stats.minimum(), + result = evaluatePredicateRange(mOperator_, literal2Double(mLiterals_), stats.minimum(), stats.maximum(), colStats.has_null()); } } @@ -535,7 +535,7 @@ namespace orc { if (colStats.has_string_statistics() && colStats.string_statistics().has_minimum() && colStats.string_statistics().has_maximum()) { const auto& stats = colStats.string_statistics(); - result = evaluatePredicateRange(mOperator, literal2String(mLiterals), stats.minimum(), + result = evaluatePredicateRange(mOperator_, literal2String(mLiterals_), stats.minimum(), stats.maximum(), colStats.has_null()); } break; @@ -544,7 +544,7 @@ namespace orc { if (colStats.has_date_statistics() && colStats.date_statistics().has_minimum() && colStats.date_statistics().has_maximum()) { const auto& stats = colStats.date_statistics(); - result = evaluatePredicateRange(mOperator, literal2Date(mLiterals), stats.minimum(), + result = evaluatePredicateRange(mOperator_, literal2Date(mLiterals_), stats.minimum(), stats.maximum(), colStats.has_null()); } break; @@ -566,7 +566,7 @@ namespace orc { Literal::Timestamp maxTimestamp( stats.maximum_utc() / 1000, static_cast((stats.maximum_utc() % 1000) * 1000000) + maxNano); - result = evaluatePredicateRange(mOperator, literal2Timestamp(mLiterals), minTimestamp, + result = evaluatePredicateRange(mOperator_, literal2Timestamp(mLiterals_), minTimestamp, maxTimestamp, colStats.has_null()); } break; @@ -575,7 +575,7 @@ namespace orc { if (colStats.has_decimal_statistics() && colStats.decimal_statistics().has_minimum() && colStats.decimal_statistics().has_maximum()) { const auto& stats = colStats.decimal_statistics(); - result = evaluatePredicateRange(mOperator, literal2Decimal(mLiterals), + result = evaluatePredicateRange(mOperator_, literal2Decimal(mLiterals_), Decimal(stats.minimum()), Decimal(stats.maximum()), colStats.has_null()); } @@ -583,7 +583,7 @@ namespace orc { } case PredicateDataType::BOOLEAN: { if (colStats.has_bucket_statistics()) { - result = evaluateBoolPredicate(mOperator, mLiterals, colStats); + result = evaluateBoolPredicate(mOperator_, mLiterals_, colStats); } break; } @@ -592,8 +592,8 @@ namespace orc { } // make sure null literal is respected for IN operator - if (mOperator == Operator::IN && colStats.has_null()) { - for (const auto& literal : mLiterals) { + if (mOperator_ == Operator::IN && colStats.has_null()) { + for (const auto& literal : mLiterals_) { if (literal.isNull()) { result = TruthValue::YES_NO_NULL; break; @@ -664,18 +664,18 @@ namespace orc { } TruthValue PredicateLeaf::evaluatePredicateBloomFiter(const BloomFilter* bf, bool hasNull) const { - switch (mOperator) { + switch (mOperator_) { case Operator::NULL_SAFE_EQUALS: // null safe equals does not return *_NULL variant. // So set hasNull to false - return checkInBloomFilter(mOperator, mType, mLiterals.front(), bf, false); + return checkInBloomFilter(mOperator_, mType_, mLiterals_.front(), bf, false); case Operator::EQUALS: - return checkInBloomFilter(mOperator, mType, mLiterals.front(), bf, hasNull); + return checkInBloomFilter(mOperator_, mType_, mLiterals_.front(), bf, hasNull); case Operator::IN: - for (const auto& literal : mLiterals) { + for (const auto& literal : mLiterals_) { // if at least one value in IN list exist in bloom filter, // qualify the row group/stripe - TruthValue result = checkInBloomFilter(mOperator, mType, literal, bf, hasNull); + TruthValue result = checkInBloomFilter(mOperator_, mType_, literal, bf, hasNull); if (result == TruthValue::YES_NO_NULL || result == TruthValue::YES_NO) { return result; } @@ -695,16 +695,16 @@ namespace orc { const BloomFilter* bloomFilter) const { // files written before ORC-135 stores timestamp wrt to local timezone // causing issues with PPD. disable PPD for timestamp for all old files - if (mType == PredicateDataType::TIMESTAMP) { + if (mType_ == PredicateDataType::TIMESTAMP) { if (writerVersion < WriterVersion::WriterVersion_ORC_135) { return TruthValue::YES_NO_NULL; } } bool allNull = colStats.has_null() && colStats.number_of_values() == 0; - if (mOperator == Operator::IS_NULL || - ((mOperator == Operator::EQUALS || mOperator == Operator::NULL_SAFE_EQUALS) && - mLiterals.at(0).isNull())) { + if (mOperator_ == Operator::IS_NULL || + ((mOperator_ == Operator::EQUALS || mOperator_ == Operator::NULL_SAFE_EQUALS) && + mLiterals_.at(0).isNull())) { // IS_NULL operator does not need to check min/max stats and bloom filter return allNull ? TruthValue::YES : (colStats.has_null() ? TruthValue::YES_NO : TruthValue::NO); @@ -714,7 +714,7 @@ namespace orc { } TruthValue result = evaluatePredicateMinMax(colStats); - if (shouldEvaluateBloomFilter(mOperator, result, bloomFilter)) { + if (shouldEvaluateBloomFilter(mOperator_, result, bloomFilter)) { return evaluatePredicateBloomFiter(bloomFilter, colStats.has_null()); } else { return result; diff --git a/c++/src/sargs/PredicateLeaf.hh b/c++/src/sargs/PredicateLeaf.hh index 21ed4561558..a0da609e44c 100644 --- a/c++/src/sargs/PredicateLeaf.hh +++ b/c++/src/sargs/PredicateLeaf.hh @@ -127,7 +127,7 @@ namespace orc { bool operator==(const PredicateLeaf& r) const; size_t getHashCode() const { - return mHashCode; + return mHashCode_; } private: @@ -143,13 +143,13 @@ namespace orc { TruthValue evaluatePredicateBloomFiter(const BloomFilter* bloomFilter, bool hasNull) const; private: - Operator mOperator; - PredicateDataType mType; - std::string mColumnName; - bool mHasColumnName; - uint64_t mColumnId; - std::vector mLiterals; - size_t mHashCode; + Operator mOperator_; + PredicateDataType mType_; + std::string mColumnName_; + bool mHasColumnName_; + uint64_t mColumnId_; + std::vector mLiterals_; + size_t mHashCode_; }; struct PredicateLeafHash { diff --git a/c++/src/sargs/SargsApplier.cc b/c++/src/sargs/SargsApplier.cc index 0e369bf4530..9818190b9d9 100644 --- a/c++/src/sargs/SargsApplier.cc +++ b/c++/src/sargs/SargsApplier.cc @@ -40,24 +40,24 @@ namespace orc { SargsApplier::SargsApplier(const Type& type, const SearchArgument* searchArgument, uint64_t rowIndexStride, WriterVersion writerVersion, ReaderMetrics* metrics, const SchemaEvolution* schemaEvolution) - : mType(type), - mSearchArgument(searchArgument), - mSchemaEvolution(schemaEvolution), - mRowIndexStride(rowIndexStride), - mWriterVersion(writerVersion), - mHasEvaluatedFileStats(false), - mFileStatsEvalResult(true), - mMetrics(metrics) { - const SearchArgumentImpl* sargs = dynamic_cast(mSearchArgument); + : mType_(type), + mSearchArgument_(searchArgument), + mSchemaEvolution_(schemaEvolution), + mRowIndexStride_(rowIndexStride), + mWriterVersion_(writerVersion), + mHasEvaluatedFileStats_(false), + mFileStatsEvalResult_(true), + mMetrics_(metrics) { + const SearchArgumentImpl* sargs = dynamic_cast(mSearchArgument_); // find the mapping from predicate leaves to columns const std::vector& leaves = sargs->getLeaves(); - mFilterColumns.resize(leaves.size(), INVALID_COLUMN_ID); - for (size_t i = 0; i != mFilterColumns.size(); ++i) { + mFilterColumns_.resize(leaves.size(), INVALID_COLUMN_ID); + for (size_t i = 0; i != mFilterColumns_.size(); ++i) { if (leaves[i].hasColumnName()) { - mFilterColumns[i] = findColumn(type, leaves[i].getColumnName()); + mFilterColumns_[i] = findColumn(type, leaves[i].getColumnName()); } else { - mFilterColumns[i] = leaves[i].getColumnId(); + mFilterColumns_[i] = leaves[i].getColumnId(); } } } @@ -66,30 +66,30 @@ namespace orc { const std::unordered_map& rowIndexes, const std::map& bloomFilters) { // init state of each row group - uint64_t groupsInStripe = (rowsInStripe + mRowIndexStride - 1) / mRowIndexStride; - mNextSkippedRows.resize(groupsInStripe); - mTotalRowsInStripe = rowsInStripe; + uint64_t groupsInStripe = (rowsInStripe + mRowIndexStride_ - 1) / mRowIndexStride_; + mNextSkippedRows_.resize(groupsInStripe); + mTotalRowsInStripe_ = rowsInStripe; // row indexes do not exist, simply read all rows if (rowIndexes.empty()) { return true; } - const auto& leaves = dynamic_cast(mSearchArgument)->getLeaves(); + const auto& leaves = dynamic_cast(mSearchArgument_)->getLeaves(); std::vector leafValues(leaves.size(), TruthValue::YES_NO_NULL); - mHasSelected = false; - mHasSkipped = false; + mHasSelected_ = false; + mHasSkipped_ = false; uint64_t nextSkippedRowGroup = groupsInStripe; size_t rowGroup = groupsInStripe; do { --rowGroup; for (size_t pred = 0; pred != leaves.size(); ++pred) { - uint64_t columnIdx = mFilterColumns[pred]; + uint64_t columnIdx = mFilterColumns_[pred]; auto rowIndexIter = rowIndexes.find(columnIdx); if (columnIdx == INVALID_COLUMN_ID || rowIndexIter == rowIndexes.cend()) { // this column does not exist in current file leafValues[pred] = TruthValue::YES_NO_NULL; - } else if (mSchemaEvolution && !mSchemaEvolution->isSafePPDConversion(columnIdx)) { + } else if (mSchemaEvolution_ && !mSchemaEvolution_->isSafePPDConversion(columnIdx)) { // cannot evaluate predicate when ppd is not safe leafValues[pred] = TruthValue::YES_NO_NULL; } else { @@ -104,37 +104,37 @@ namespace orc { bloomFilter = iter->second.entries.at(rowGroup); } - leafValues[pred] = leaves[pred].evaluate(mWriterVersion, statistics, bloomFilter.get()); + leafValues[pred] = leaves[pred].evaluate(mWriterVersion_, statistics, bloomFilter.get()); } } - bool needed = isNeeded(mSearchArgument->evaluate(leafValues)); + bool needed = isNeeded(mSearchArgument_->evaluate(leafValues)); if (!needed) { - mNextSkippedRows[rowGroup] = 0; + mNextSkippedRows_[rowGroup] = 0; nextSkippedRowGroup = rowGroup; } else { - mNextSkippedRows[rowGroup] = (nextSkippedRowGroup == groupsInStripe) + mNextSkippedRows_[rowGroup] = (nextSkippedRowGroup == groupsInStripe) ? rowsInStripe - : (nextSkippedRowGroup * mRowIndexStride); + : (nextSkippedRowGroup * mRowIndexStride_); } - mHasSelected |= needed; - mHasSkipped |= !needed; + mHasSelected_ |= needed; + mHasSkipped_ |= !needed; } while (rowGroup != 0); // update stats uint64_t selectedRGs = std::accumulate( - mNextSkippedRows.cbegin(), mNextSkippedRows.cend(), 0UL, + mNextSkippedRows_.cbegin(), mNextSkippedRows_.cend(), 0UL, [](uint64_t initVal, uint64_t rg) { return rg > 0 ? initVal + 1 : initVal; }); - if (mMetrics != nullptr) { - mMetrics->SelectedRowGroupCount.fetch_add(selectedRGs); - mMetrics->EvaluatedRowGroupCount.fetch_add(groupsInStripe); + if (mMetrics_ != nullptr) { + mMetrics_->SelectedRowGroupCount.fetch_add(selectedRGs); + mMetrics_->EvaluatedRowGroupCount.fetch_add(groupsInStripe); } - return mHasSelected; + return mHasSelected_; } bool SargsApplier::evaluateColumnStatistics(const PbColumnStatistics& colStats) const { - const SearchArgumentImpl* sargs = dynamic_cast(mSearchArgument); + const SearchArgumentImpl* sargs = dynamic_cast(mSearchArgument_); if (sargs == nullptr) { throw InvalidArgument("Failed to cast to SearchArgumentImpl"); } @@ -143,14 +143,14 @@ namespace orc { std::vector leafValues(leaves.size(), TruthValue::YES_NO_NULL); for (size_t pred = 0; pred != leaves.size(); ++pred) { - uint64_t columnId = mFilterColumns[pred]; + uint64_t columnId = mFilterColumns_[pred]; if (columnId != INVALID_COLUMN_ID && colStats.size() > static_cast(columnId)) { - leafValues[pred] = leaves[pred].evaluate(mWriterVersion, + leafValues[pred] = leaves[pred].evaluate(mWriterVersion_, colStats.Get(static_cast(columnId)), nullptr); } } - return isNeeded(mSearchArgument->evaluate(leafValues)); + return isNeeded(mSearchArgument_->evaluate(leafValues)); } bool SargsApplier::evaluateStripeStatistics(const proto::StripeStatistics& stripeStats, @@ -160,29 +160,29 @@ namespace orc { } bool ret = evaluateColumnStatistics(stripeStats.col_stats()); - if (mMetrics != nullptr) { - mMetrics->EvaluatedRowGroupCount.fetch_add(stripeRowGroupCount); + if (mMetrics_ != nullptr) { + mMetrics_->EvaluatedRowGroupCount.fetch_add(stripeRowGroupCount); } if (!ret) { // reset mNextSkippedRows when the current stripe does not satisfy the PPD - mNextSkippedRows.clear(); + mNextSkippedRows_.clear(); } return ret; } bool SargsApplier::evaluateFileStatistics(const proto::Footer& footer, uint64_t numRowGroupsInStripeRange) { - if (!mHasEvaluatedFileStats) { + if (!mHasEvaluatedFileStats_) { if (footer.statistics_size() == 0) { - mFileStatsEvalResult = true; + mFileStatsEvalResult_ = true; } else { - mFileStatsEvalResult = evaluateColumnStatistics(footer.statistics()); - if (mMetrics != nullptr) { - mMetrics->EvaluatedRowGroupCount.fetch_add(numRowGroupsInStripeRange); + mFileStatsEvalResult_ = evaluateColumnStatistics(footer.statistics()); + if (mMetrics_ != nullptr) { + mMetrics_->EvaluatedRowGroupCount.fetch_add(numRowGroupsInStripeRange); } } - mHasEvaluatedFileStats = true; + mHasEvaluatedFileStats_ = true; } - return mFileStatsEvalResult; + return mFileStatsEvalResult_; } } // namespace orc diff --git a/c++/src/sargs/SargsApplier.hh b/c++/src/sargs/SargsApplier.hh index 73703dcf6b8..d48dcfdab34 100644 --- a/c++/src/sargs/SargsApplier.hh +++ b/c++/src/sargs/SargsApplier.hh @@ -75,30 +75,30 @@ namespace orc { * Only valid after invoking pickRowGroups(). */ const std::vector& getNextSkippedRows() const { - return mNextSkippedRows; + return mNextSkippedRows_; } /** * Indicate whether any row group is selected in the last evaluation */ bool hasSelected() const { - return mHasSelected; + return mHasSelected_; } /** * Indicate whether any row group is skipped in the last evaluation */ bool hasSkipped() const { - return mHasSkipped; + return mHasSkipped_; } /** * Whether any row group from current row in the stripe matches PPD. */ bool hasSelectedFrom(uint64_t currentRowInStripe) const { - uint64_t rg = currentRowInStripe / mRowIndexStride; - for (; rg < mNextSkippedRows.size(); ++rg) { - if (mNextSkippedRows[rg]) { + uint64_t rg = currentRowInStripe / mRowIndexStride_; + for (; rg < mNextSkippedRows_.size(); ++rg) { + if (mNextSkippedRows_[rg]) { return true; } } @@ -106,9 +106,9 @@ namespace orc { } std::pair getStats() const { - if (mMetrics != nullptr) { - return std::make_pair(mMetrics->SelectedRowGroupCount.load(), - mMetrics->EvaluatedRowGroupCount.load()); + if (mMetrics_ != nullptr) { + return std::make_pair(mMetrics_->SelectedRowGroupCount.load(), + mMetrics_->EvaluatedRowGroupCount.load()); } else { return {0, 0}; } @@ -125,27 +125,27 @@ namespace orc { static uint64_t findColumn(const Type& type, const std::string& colName); private: - const Type& mType; - const SearchArgument* mSearchArgument; - const SchemaEvolution* mSchemaEvolution; - uint64_t mRowIndexStride; - WriterVersion mWriterVersion; + const Type& mType_; + const SearchArgument* mSearchArgument_; + const SchemaEvolution* mSchemaEvolution_; + uint64_t mRowIndexStride_; + WriterVersion mWriterVersion_; // column ids for each predicate leaf in the search argument - std::vector mFilterColumns; + std::vector mFilterColumns_; // Map from RowGroup index to the next skipped row of the selected range it // locates. If the RowGroup is not selected, set the value to 0. // Calculated in pickRowGroups(). - std::vector mNextSkippedRows; - uint64_t mTotalRowsInStripe; - bool mHasSelected; - bool mHasSkipped; + std::vector mNextSkippedRows_; + uint64_t mTotalRowsInStripe_; + bool mHasSelected_; + bool mHasSkipped_; // store result of file stats evaluation - bool mHasEvaluatedFileStats; - bool mFileStatsEvalResult; + bool mHasEvaluatedFileStats_; + bool mFileStatsEvalResult_; // use the SelectedRowGroupCount and EvaluatedRowGroupCount to // keep stats of selected RGs and evaluated RGs - ReaderMetrics* mMetrics; + ReaderMetrics* mMetrics_; }; } // namespace orc diff --git a/c++/src/sargs/SearchArgument.cc b/c++/src/sargs/SearchArgument.cc index 806727f0a03..31fabae29e1 100644 --- a/c++/src/sargs/SearchArgument.cc +++ b/c++/src/sargs/SearchArgument.cc @@ -30,23 +30,23 @@ namespace orc { } const std::vector& SearchArgumentImpl::getLeaves() const { - return mLeaves; + return mLeaves_; } const ExpressionTree* SearchArgumentImpl::getExpression() const { - return mExpressionTree.get(); + return mExpressionTree_.get(); } TruthValue SearchArgumentImpl::evaluate(const std::vector& leaves) const { - return mExpressionTree == nullptr ? TruthValue::YES : mExpressionTree->evaluate(leaves); + return mExpressionTree_ == nullptr ? TruthValue::YES : mExpressionTree_->evaluate(leaves); } std::string SearchArgumentImpl::toString() const { std::ostringstream sstream; - for (size_t i = 0; i != mLeaves.size(); ++i) { - sstream << "leaf-" << i << " = " << mLeaves.at(i).toString() << ", "; + for (size_t i = 0; i != mLeaves_.size(); ++i) { + sstream << "leaf-" << i << " = " << mLeaves_.at(i).toString() << ", "; } - sstream << "expr = " << mExpressionTree->toString(); + sstream << "expr = " << mExpressionTree_->toString(); return sstream.str(); } @@ -55,14 +55,14 @@ namespace orc { } SearchArgumentBuilderImpl::SearchArgumentBuilderImpl() { - mRoot.reset(new ExpressionTree(ExpressionTree::Operator::AND)); - mCurrTree.push_back(mRoot); + mRoot_.reset(new ExpressionTree(ExpressionTree::Operator::AND)); + mCurrTree_.push_back(mRoot_); } SearchArgumentBuilder& SearchArgumentBuilderImpl::start(ExpressionTree::Operator op) { TreeNode node = std::make_shared(op); - mCurrTree.front()->addChild(node); - mCurrTree.push_front(node); + mCurrTree_.front()->addChild(node); + mCurrTree_.push_front(node); return *this; } @@ -79,9 +79,9 @@ namespace orc { } SearchArgumentBuilder& SearchArgumentBuilderImpl::end() { - TreeNode& current = mCurrTree.front(); + TreeNode& current = mCurrTree_.front(); if (current->getChildren().empty()) { - throw std::invalid_argument("Cannot create expression " + mRoot->toString() + + throw std::invalid_argument("Cannot create expression " + mRoot_->toString() + " with no children."); } if (current->getOperator() == ExpressionTree::Operator::NOT && @@ -89,13 +89,13 @@ namespace orc { throw std::invalid_argument("Can't create NOT expression " + current->toString() + " with more than 1 child."); } - mCurrTree.pop_front(); + mCurrTree_.pop_front(); return *this; } size_t SearchArgumentBuilderImpl::addLeaf(PredicateLeaf leaf) { - size_t id = mLeaves.size(); - const auto& result = mLeaves.insert(std::make_pair(leaf, id)); + size_t id = mLeaves_.size(); + const auto& result = mLeaves_.insert(std::make_pair(leaf, id)); return result.first->second; } @@ -112,7 +112,7 @@ namespace orc { T column, PredicateDataType type, Literal literal) { - TreeNode parent = mCurrTree.front(); + TreeNode parent = mCurrTree_.front(); if (isInvalidColumn(column)) { parent->addChild(std::make_shared(TruthValue::YES_NO_NULL)); } else { @@ -181,7 +181,7 @@ namespace orc { template SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForIn(T column, PredicateDataType type, const CONTAINER& literals) { - TreeNode& parent = mCurrTree.front(); + TreeNode& parent = mCurrTree_.front(); if (isInvalidColumn(column)) { parent->addChild(std::make_shared((TruthValue::YES_NO_NULL))); } else { @@ -219,7 +219,7 @@ namespace orc { template SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForIsNull(T column, PredicateDataType type) { - TreeNode& parent = mCurrTree.front(); + TreeNode& parent = mCurrTree_.front(); if (isInvalidColumn(column)) { parent->addChild(std::make_shared(TruthValue::YES_NO_NULL)); } else { @@ -244,7 +244,7 @@ namespace orc { PredicateDataType type, Literal lower, Literal upper) { - TreeNode& parent = mCurrTree.front(); + TreeNode& parent = mCurrTree_.front(); if (isInvalidColumn(column)) { parent->addChild(std::make_shared(TruthValue::YES_NO_NULL)); } else { @@ -267,7 +267,7 @@ namespace orc { } SearchArgumentBuilder& SearchArgumentBuilderImpl::literal(TruthValue truth) { - TreeNode& parent = mCurrTree.front(); + TreeNode& parent = mCurrTree_.front(); parent->addChild(std::make_shared(truth)); return *this; } @@ -555,34 +555,34 @@ namespace orc { } SearchArgumentImpl::SearchArgumentImpl(TreeNode root, const std::vector& leaves) - : mExpressionTree(root), mLeaves(leaves) { + : mExpressionTree_(root), mLeaves_(leaves) { // PASS } std::unique_ptr SearchArgumentBuilderImpl::build() { - if (mCurrTree.size() != 1) { - throw std::invalid_argument("Failed to end " + std::to_string(mCurrTree.size()) + + if (mCurrTree_.size() != 1) { + throw std::invalid_argument("Failed to end " + std::to_string(mCurrTree_.size()) + " operations."); } - mRoot = pushDownNot(mRoot); - mRoot = foldMaybe(mRoot); - mRoot = flatten(mRoot); - mRoot = convertToCNF(mRoot); - mRoot = flatten(mRoot); - std::vector leafReorder(mLeaves.size(), UNUSED_LEAF); - size_t newLeafCount = compactLeaves(mRoot, 0, leafReorder.data()); - mRoot = rewriteLeaves(mRoot, leafReorder.data()); + mRoot_ = pushDownNot(mRoot_); + mRoot_ = foldMaybe(mRoot_); + mRoot_ = flatten(mRoot_); + mRoot_ = convertToCNF(mRoot_); + mRoot_ = flatten(mRoot_); + std::vector leafReorder(mLeaves_.size(), UNUSED_LEAF); + size_t newLeafCount = compactLeaves(mRoot_, 0, leafReorder.data()); + mRoot_ = rewriteLeaves(mRoot_, leafReorder.data()); std::vector leafList(newLeafCount, PredicateLeaf()); // build the new list - for (auto& leaf : mLeaves) { + for (auto& leaf : mLeaves_) { size_t newLoc = leafReorder[leaf.second]; if (newLoc != UNUSED_LEAF) { leafList[newLoc] = leaf.first; } } - return std::make_unique(mRoot, leafList); + return std::make_unique(mRoot_, leafList); } std::unique_ptr SearchArgumentFactory::newBuilder() { diff --git a/c++/src/sargs/SearchArgument.hh b/c++/src/sargs/SearchArgument.hh index 4b74b287434..095dc24a498 100644 --- a/c++/src/sargs/SearchArgument.hh +++ b/c++/src/sargs/SearchArgument.hh @@ -66,8 +66,8 @@ namespace orc { std::string toString() const override; private: - std::shared_ptr mExpressionTree; - std::vector mLeaves; + std::shared_ptr mExpressionTree_; + std::vector mLeaves_; }; /** @@ -304,9 +304,9 @@ namespace orc { static TreeNode convertToCNF(TreeNode root); private: - std::deque mCurrTree; - std::unordered_map mLeaves; - std::shared_ptr mRoot; + std::deque mCurrTree_; + std::unordered_map mLeaves_; + std::shared_ptr mRoot_; }; } // namespace orc diff --git a/c++/test/MemoryInputStream.hh b/c++/test/MemoryInputStream.hh index 29704e11e40..e6ef55b6de1 100644 --- a/c++/test/MemoryInputStream.hh +++ b/c++/test/MemoryInputStream.hh @@ -27,35 +27,35 @@ namespace orc { class MemoryInputStream : public InputStream { public: - MemoryInputStream(const char* _buffer, size_t _size) - : buffer(_buffer), size(_size), naturalReadSize(1024), name("MemoryInputStream") {} + MemoryInputStream(const char* buffer, size_t size) + : buffer_(buffer), size_(size), naturalReadSize_(1024), name_("MemoryInputStream") {} ~MemoryInputStream() override; virtual uint64_t getLength() const override { - return size; + return size_; } virtual uint64_t getNaturalReadSize() const override { - return naturalReadSize; + return naturalReadSize_; } virtual void read(void* buf, uint64_t length, uint64_t offset) override { - memcpy(buf, buffer + offset, length); + memcpy(buf, buffer_ + offset, length); } virtual const std::string& getName() const override { - return name; + return name_; } const char* getData() const { - return buffer; + return buffer_; } private: - const char* buffer; - uint64_t size, naturalReadSize; - std::string name; + const char* buffer_; + uint64_t size_, naturalReadSize_; + std::string name_; }; } // namespace orc diff --git a/c++/test/MemoryOutputStream.cc b/c++/test/MemoryOutputStream.cc index 1d0d55053b7..81045b40948 100644 --- a/c++/test/MemoryOutputStream.cc +++ b/c++/test/MemoryOutputStream.cc @@ -21,11 +21,11 @@ namespace orc { MemoryOutputStream::~MemoryOutputStream() { - delete[] data; + delete[] data_; } void MemoryOutputStream::write(const void* buf, size_t size) { - memcpy(data + length, buf, size); - length += size; + memcpy(data_ + length_, buf, size); + length_ += size; } } // namespace orc diff --git a/c++/test/MemoryOutputStream.hh b/c++/test/MemoryOutputStream.hh index e6b70f59363..3b56868f96c 100644 --- a/c++/test/MemoryOutputStream.hh +++ b/c++/test/MemoryOutputStream.hh @@ -28,30 +28,30 @@ namespace orc { class MemoryOutputStream : public OutputStream { public: - MemoryOutputStream(size_t capacity) : name("MemoryOutputStream") { - data = new char[capacity]; - length = 0; - naturalWriteSize = 2048; + MemoryOutputStream(size_t capacity) : name_("MemoryOutputStream") { + data_ = new char[capacity]; + length_ = 0; + naturalWriteSize_ = 2048; } virtual ~MemoryOutputStream() override; virtual uint64_t getLength() const override { - return length; + return length_; } virtual uint64_t getNaturalWriteSize() const override { - return naturalWriteSize; + return naturalWriteSize_; } virtual void write(const void* buf, size_t size) override; virtual const std::string& getName() const override { - return name; + return name_; } const char* getData() const { - return data; + return data_; } void close() override {} @@ -59,13 +59,13 @@ namespace orc { void flush() override {} void reset() { - length = 0; + length_ = 0; } private: - char* data; - std::string name; - uint64_t length, naturalWriteSize; + char* data_; + std::string name_; + uint64_t length_, naturalWriteSize_; }; } // namespace orc diff --git a/c++/test/TestBloomFilter.cc b/c++/test/TestBloomFilter.cc index 0b6cc9ebebe..609995fce97 100644 --- a/c++/test/TestBloomFilter.cc +++ b/c++/test/TestBloomFilter.cc @@ -106,11 +106,11 @@ namespace orc { } #define CheckBitSet(bf, p1, p2, p3, p4, p5) \ - EXPECT_TRUE(bf.mBitSet->get(p1)); \ - EXPECT_TRUE(bf.mBitSet->get(p2)); \ - EXPECT_TRUE(bf.mBitSet->get(p3)); \ - EXPECT_TRUE(bf.mBitSet->get(p4)); \ - EXPECT_TRUE(bf.mBitSet->get(p5)) + EXPECT_TRUE(bf.mBitSet_->get(p1)); \ + EXPECT_TRUE(bf.mBitSet_->get(p2)); \ + EXPECT_TRUE(bf.mBitSet_->get(p3)); \ + EXPECT_TRUE(bf.mBitSet_->get(p4)); \ + EXPECT_TRUE(bf.mBitSet_->get(p5)) // Same test as TestBloomFilter#testBasicOperations() in Java codes. We also // verifies the bitSet positions that are set, to make sure both the Java and C++ codes diff --git a/c++/test/TestDecompression.cc b/c++/test/TestDecompression.cc index e729fc8d8ab..dc6caeda0e5 100644 --- a/c++/test/TestDecompression.cc +++ b/c++/test/TestDecompression.cc @@ -601,33 +601,33 @@ namespace orc { #define HEADER_SIZE 3 class CompressBuffer { - std::vector buf; + std::vector buf_; public: - CompressBuffer(size_t capacity) : buf(capacity + HEADER_SIZE) {} + CompressBuffer(size_t capacity) : buf_(capacity + HEADER_SIZE) {} char* getCompressed() { - return buf.data() + HEADER_SIZE; + return buf_.data() + HEADER_SIZE; } char* getBuffer() { - return buf.data(); + return buf_.data(); } void writeHeader(size_t compressedSize) { - buf[0] = static_cast(compressedSize << 1); - buf[1] = static_cast(compressedSize >> 7); - buf[2] = static_cast(compressedSize >> 15); + buf_[0] = static_cast(compressedSize << 1); + buf_[1] = static_cast(compressedSize >> 7); + buf_[2] = static_cast(compressedSize >> 15); } void writeUncompressedHeader(size_t compressedSize) { writeHeader(compressedSize); - buf[0] |= 1; + buf_[0] |= 1; } size_t getCompressedSize() const { - size_t header = static_cast(buf[0]); - header |= static_cast(static_cast(buf[1])) << 8; - header |= static_cast(static_cast(buf[2])) << 16; + size_t header = static_cast(buf_[0]); + header |= static_cast(static_cast(buf_[1])) << 8; + header |= static_cast(static_cast(buf_[2])) << 16; return header >> 1; } diff --git a/run_clang_tidy.py b/run_clang_tidy.py new file mode 100644 index 00000000000..32ecc778ed4 --- /dev/null +++ b/run_clang_tidy.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python +# +#===- run-clang-tidy.py - Parallel clang-tidy runner ---------*- python -*--===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# +# FIXME: Integrate with clang-tidy-diff.py + +""" +Parallel clang-tidy runner +========================== + +Runs clang-tidy over all files in a compilation database. Requires clang-tidy +and clang-apply-replacements in $PATH. + +Example invocations. +- Run clang-tidy on all files in the current working directory with a default + set of checks and show warnings in the cpp files and all project headers. + run-clang-tidy.py $PWD + +- Fix all header guards. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard + +- Fix all header guards included from clang-tidy and header guards + for clang-tidy headers. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \ + -header-filter=extra/clang-tidy + +Compilation database setup: +http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html +""" + +from __future__ import print_function + +import argparse +import glob +import json +import multiprocessing +import os +import re +import shutil +import subprocess +import sys +import tempfile +import threading +import traceback + +try: + import yaml +except ImportError: + yaml = None + +is_py2 = sys.version[0] == '2' + +if is_py2: + import Queue as queue +else: + import queue as queue + +def find_compilation_database(path): + """Adjusts the directory until a compilation database is found.""" + result = './' + while not os.path.isfile(os.path.join(result, path)): + if os.path.realpath(result) == '/': + print('Error: could not find compilation database.') + sys.exit(1) + result += '../' + return os.path.realpath(result) + + +def make_absolute(f, directory): + if os.path.isabs(f): + return f + return os.path.normpath(os.path.join(directory, f)) + + +def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path, + header_filter, extra_arg, extra_arg_before, quiet, + config): + """Gets a command line for clang-tidy.""" + start = [clang_tidy_binary] + if header_filter is not None: + start.append('-header-filter=' + header_filter) + if checks: + start.append('-checks=' + checks) + if tmpdir is not None: + start.append('-export-fixes') + # Get a temporary file. We immediately close the handle so clang-tidy can + # overwrite it. + (handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) + os.close(handle) + start.append(name) + for arg in extra_arg: + start.append('-extra-arg=%s' % arg) + for arg in extra_arg_before: + start.append('-extra-arg-before=%s' % arg) + start.append('-p=' + build_path) + if quiet: + start.append('-quiet') + if config: + start.append('-config=' + config) + start.append(f) + return start + + +def merge_replacement_files(tmpdir, mergefile): + """Merge all replacement files in a directory into a single file""" + # The fixes suggested by clang-tidy >= 4.0.0 are given under + # the top level key 'Diagnostics' in the output yaml files + mergekey="Diagnostics" + merged=[] + for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): + content = yaml.safe_load(open(replacefile, 'r')) + if not content: + continue # Skip empty files. + merged.extend(content.get(mergekey, [])) + + if merged: + # MainSourceFile: The key is required by the definition inside + # include/clang/Tooling/ReplacementsYaml.h, but the value + # is actually never used inside clang-apply-replacements, + # so we set it to '' here. + output = { 'MainSourceFile': '', mergekey: merged } + with open(mergefile, 'w') as out: + yaml.safe_dump(output, out) + else: + # Empty the file: + open(mergefile, 'w').close() + + +def check_clang_apply_replacements_binary(args): + """Checks if invoking supplied clang-apply-replacements binary works.""" + try: + subprocess.check_call([args.clang_apply_replacements_binary, '--version']) + except: + print('Unable to run clang-apply-replacements. Is clang-apply-replacements ' + 'binary correctly specified?', file=sys.stderr) + traceback.print_exc() + sys.exit(1) + + +def apply_fixes(args, tmpdir): + """Calls clang-apply-fixes on a given directory.""" + invocation = [args.clang_apply_replacements_binary] + if args.format: + invocation.append('-format') + if args.style: + invocation.append('-style=' + args.style) + invocation.append(tmpdir) + subprocess.call(invocation) + + +def run_tidy(args, tmpdir, build_path, queue, lock, failed_files): + """Takes filenames out of queue and runs clang-tidy on them.""" + while True: + name = queue.get() + invocation = get_tidy_invocation(name, args.clang_tidy_binary, args.checks, + tmpdir, build_path, args.header_filter, + args.extra_arg, args.extra_arg_before, + args.quiet, args.config) + + proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = proc.communicate() + if proc.returncode != 0: + failed_files.append(name) + with lock: + sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) + if len(err) > 0: + sys.stdout.flush() + sys.stderr.write(err.decode('utf-8')) + queue.task_done() + + +def main(): + parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' + 'in a compilation database. Requires ' + 'clang-tidy and clang-apply-replacements in ' + '$PATH.') + parser.add_argument('-clang-tidy-binary', metavar='PATH', + default='clang-tidy', + help='path to clang-tidy binary') + parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', + default='clang-apply-replacements', + help='path to clang-apply-replacements binary') + parser.add_argument('-checks', default=None, + help='checks filter, when not specified, use clang-tidy ' + 'default') + parser.add_argument('-config', default=None, + help='Specifies a configuration in YAML/JSON format: ' + ' -config="{Checks: \'*\', ' + ' CheckOptions: [{key: x, ' + ' value: y}]}" ' + 'When the value is empty, clang-tidy will ' + 'attempt to find a file named .clang-tidy for ' + 'each source file in its parent directories.') + parser.add_argument('-header-filter', default=None, + help='regular expression matching the names of the ' + 'headers to output diagnostics from. Diagnostics from ' + 'the main file of each translation unit are always ' + 'displayed.') + if yaml: + parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', + help='Create a yaml file to store suggested fixes in, ' + 'which can be applied with clang-apply-replacements.') + parser.add_argument('-j', type=int, default=0, + help='number of tidy instances to be run in parallel.') + parser.add_argument('files', nargs='*', default=['.*'], + help='files to be processed (regex on path)') + parser.add_argument('-fix', action='store_true', help='apply fix-its') + parser.add_argument('-format', action='store_true', help='Reformat code ' + 'after applying fixes') + parser.add_argument('-style', default='file', help='The style of reformat ' + 'code after applying fixes') + parser.add_argument('-p', dest='build_path', + help='Path used to read a compile command database.') + parser.add_argument('-extra-arg', dest='extra_arg', + action='append', default=[], + help='Additional argument to append to the compiler ' + 'command line.') + parser.add_argument('-extra-arg-before', dest='extra_arg_before', + action='append', default=[], + help='Additional argument to prepend to the compiler ' + 'command line.') + parser.add_argument('-quiet', action='store_true', + help='Run clang-tidy in quiet mode') + args = parser.parse_args() + + db_path = 'compile_commands.json' + + if args.build_path is not None: + build_path = args.build_path + else: + # Find our database + build_path = find_compilation_database(db_path) + + try: + invocation = [args.clang_tidy_binary, '-list-checks'] + invocation.append('-p=' + build_path) + if args.checks: + invocation.append('-checks=' + args.checks) + invocation.append('-') + if args.quiet: + # Even with -quiet we still want to check if we can call clang-tidy. + with open(os.devnull, 'w') as dev_null: + subprocess.check_call(invocation, stdout=dev_null) + else: + subprocess.check_call(invocation) + except: + print("Unable to run clang-tidy.", file=sys.stderr) + sys.exit(1) + + # Load the database and extract all files. + database = json.load(open(os.path.join(build_path, db_path))) + files = [make_absolute(entry['file'], entry['directory']) + for entry in database] + + max_task = args.j + if max_task == 0: + max_task = multiprocessing.cpu_count() + + tmpdir = None + if args.fix or (yaml and args.export_fixes): + check_clang_apply_replacements_binary(args) + tmpdir = tempfile.mkdtemp() + + # Build up a big regexy filter from all command line arguments. + file_name_re = re.compile('|'.join(args.files)) + + return_code = 0 + try: + # Spin up a bunch of tidy-launching threads. + task_queue = queue.Queue(max_task) + # List of files with a non-zero return code. + failed_files = [] + lock = threading.Lock() + for _ in range(max_task): + t = threading.Thread(target=run_tidy, + args=(args, tmpdir, build_path, task_queue, lock, failed_files)) + t.daemon = True + t.start() + + # Fill the queue with files. + for name in files: + if file_name_re.search(name): + task_queue.put(name) + + # Wait for all threads to be done. + task_queue.join() + if len(failed_files): + return_code = 1 + + except KeyboardInterrupt: + # This is a sad hack. Unfortunately subprocess goes + # bonkers with ctrl-c and we start forking merrily. + print('\nCtrl-C detected, goodbye.') + if tmpdir: + shutil.rmtree(tmpdir) + os.kill(0, 9) + + if yaml and args.export_fixes: + print('Writing fixes to ' + args.export_fixes + ' ...') + try: + merge_replacement_files(tmpdir, args.export_fixes) + except: + print('Error exporting fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code=1 + + if args.fix: + print('Applying fixes ...') + try: + apply_fixes(args, tmpdir) + except: + print('Error applying fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code=1 + + if tmpdir: + shutil.rmtree(tmpdir) + sys.exit(return_code) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/tools/src/FileMemory.cc b/tools/src/FileMemory.cc index f0744ad2064..f92cf828aac 100644 --- a/tools/src/FileMemory.cc +++ b/tools/src/FileMemory.cc @@ -25,32 +25,32 @@ class TestMemoryPool : public orc::MemoryPool { private: - std::map blocks; - uint64_t totalMemory; - uint64_t maxMemory; + std::map blocks_; + uint64_t totalMemory_; + uint64_t maxMemory_; public: char* malloc(uint64_t size) override { char* p = static_cast(std::malloc(size)); - blocks[p] = size; - totalMemory += size; - if (maxMemory < totalMemory) { - maxMemory = totalMemory; + blocks_[p] = size; + totalMemory_ += size; + if (maxMemory_ < totalMemory_) { + maxMemory_ = totalMemory_; } return p; } void free(char* p) override { std::free(p); - totalMemory -= blocks[p]; - blocks.erase(p); + totalMemory_ -= blocks_[p]; + blocks_.erase(p); } uint64_t getMaxMemory() { - return maxMemory; + return maxMemory_; } - TestMemoryPool() : totalMemory(0), maxMemory(0) {} + TestMemoryPool() : totalMemory_(0), maxMemory_(0) {} ~TestMemoryPool() override; }; diff --git a/tools/test/TestFileScan.cc b/tools/test/TestFileScan.cc index ecf6b4f12fc..2e35d86ef02 100644 --- a/tools/test/TestFileScan.cc +++ b/tools/test/TestFileScan.cc @@ -202,13 +202,13 @@ TEST(TestFileScan, testBadCommand) { error); } -void checkForError(const std::string& filename, const std::string& error_msg) { +void checkForError(const std::string& filename, const std::string& errorMsg) { const std::string pgm = findProgram("tools/src/orc-scan"); std::string output; std::string error; EXPECT_EQ(1, runProgram({pgm, filename}, output, error)); EXPECT_EQ("", output); - EXPECT_NE(std::string::npos, error.find(error_msg)) << error; + EXPECT_NE(std::string::npos, error.find(errorMsg)) << error; } TEST(TestFileScan, testErrorHandling) { diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc index 305f936bcdf..5370b47792e 100644 --- a/tools/test/TestMatch.cc +++ b/tools/test/TestMatch.cc @@ -47,24 +47,24 @@ namespace orc { uint64_t rowIndexStride; std::map userMeta; - OrcFileDescription(const std::string& _filename, const std::string& _json, - const std::string& _typeString, const std::string& _version, - const std::string& _softwareVersion, uint64_t _rowCount, - uint64_t _contentLength, uint64_t _stripeCount, CompressionKind _compression, - size_t _compressionSize, uint64_t _rowIndexStride, - const std::map& _meta) - : filename(_filename), - json(_json), - typeString(_typeString), - formatVersion(_version), - softwareVersion(_softwareVersion), - rowCount(_rowCount), - contentLength(_contentLength), - stripeCount(_stripeCount), - compression(_compression), - compressionSize(_compressionSize), - rowIndexStride(_rowIndexStride), - userMeta(_meta) { + OrcFileDescription(const std::string& filename, const std::string& json, + const std::string& typeString, const std::string& version, + const std::string& softwareVersion, uint64_t rowCount, + uint64_t contentLength, uint64_t stripeCount, CompressionKind compression, + size_t compressionSize, uint64_t rowIndexStride, + const std::map& meta) + : filename(filename), + json(json), + typeString(typeString), + formatVersion(version), + softwareVersion(softwareVersion), + rowCount(rowCount), + contentLength(contentLength), + stripeCount(stripeCount), + compression(compression), + compressionSize(compressionSize), + rowIndexStride(rowIndexStride), + userMeta(meta) { // PASS } diff --git a/tools/test/gzip.cc b/tools/test/gzip.cc index 4a188158a20..8f076704023 100644 --- a/tools/test/gzip.cc +++ b/tools/test/gzip.cc @@ -28,47 +28,47 @@ namespace orc { - GzipTextReader::GzipTextReader(const std::string& _filename) : filename(_filename) { - file = fopen(filename.c_str(), "rb"); - if (file == nullptr) { - throw std::runtime_error("can't open " + filename); + GzipTextReader::GzipTextReader(const std::string& filename) : filename_(filename) { + file_ = fopen(filename_.c_str(), "rb"); + if (file_ == nullptr) { + throw std::runtime_error("can't open " + filename_); } - stream.zalloc = nullptr; - stream.zfree = nullptr; - stream.opaque = nullptr; - stream.avail_in = 0; - stream.avail_out = 1; - stream.next_in = nullptr; - int ret = inflateInit2(&stream, 16 + MAX_WBITS); + stream_.zalloc = nullptr; + stream_.zfree = nullptr; + stream_.opaque = nullptr; + stream_.avail_in = 0; + stream_.avail_out = 1; + stream_.next_in = nullptr; + int ret = inflateInit2(&stream_, 16 + MAX_WBITS); if (ret != Z_OK) { - throw std::runtime_error("zlib failed initialization for " + filename); + throw std::runtime_error("zlib failed initialization for " + filename_); } - outPtr = nullptr; - outEnd = nullptr; - isDone = false; + outPtr_ = nullptr; + outEnd_ = nullptr; + isDone_ = false; } bool GzipTextReader::nextBuffer() { // if we are done, return - if (isDone) { + if (isDone_) { return false; } // if the last read is done, read more - if (stream.avail_in == 0 && stream.avail_out != 0) { - stream.next_in = input; - stream.avail_in = static_cast(fread(input, 1, sizeof(input), file)); - if (ferror(file)) { - throw std::runtime_error("failure reading " + filename); + if (stream_.avail_in == 0 && stream_.avail_out != 0) { + stream_.next_in = input_; + stream_.avail_in = static_cast(fread(input_, 1, sizeof(input_), file_)); + if (ferror(file_)) { + throw std::runtime_error("failure reading " + filename_); } } - stream.avail_out = sizeof(output); - stream.next_out = output; - int ret = inflate(&stream, Z_NO_FLUSH); + stream_.avail_out = sizeof(output_); + stream_.next_out = output_; + int ret = inflate(&stream_, Z_NO_FLUSH); switch (ret) { case Z_OK: break; case Z_STREAM_END: - isDone = true; + isDone_ = true; break; case Z_STREAM_ERROR: throw std::runtime_error("zlib stream problem"); @@ -82,8 +82,8 @@ namespace orc { default: throw std::runtime_error("zlib unknown problem"); } - outPtr = output; - outEnd = output + (sizeof(output) - stream.avail_out); + outPtr_ = output_; + outEnd_ = output_ + (sizeof(output_) - stream_.avail_out); return true; } @@ -91,12 +91,12 @@ namespace orc { bool result = false; line.clear(); while (true) { - if (outPtr == outEnd) { + if (outPtr_ == outEnd_) { if (!nextBuffer()) { return result; } } - unsigned char ch = *(outPtr++); + unsigned char ch = *(outPtr_++); if (ch == '\n') { return true; } @@ -105,9 +105,9 @@ namespace orc { } GzipTextReader::~GzipTextReader() { - inflateEnd(&stream); - if (fclose(file) != 0) { - std::cerr << "can't close file " << filename; + inflateEnd(&stream_); + if (fclose(file_) != 0) { + std::cerr << "can't close file " << filename_; } } } // namespace orc diff --git a/tools/test/gzip.hh b/tools/test/gzip.hh index 69ef5f0e1b4..d21689162d1 100644 --- a/tools/test/gzip.hh +++ b/tools/test/gzip.hh @@ -27,14 +27,14 @@ namespace orc { class GzipTextReader { private: - std::string filename; - FILE* file; - z_stream stream; - unsigned char input[64 * 1024]; - unsigned char output[64 * 1024]; - unsigned char* outPtr; - unsigned char* outEnd; - bool isDone; + std::string filename_; + FILE* file_; + z_stream stream_; + unsigned char input_[64 * 1024]; + unsigned char output_[64 * 1024]; + unsigned char* outPtr_; + unsigned char* outEnd_; + bool isDone_; bool nextBuffer();