From 9673514efa52d71daea5f67ce27138895417d5b3 Mon Sep 17 00:00:00 2001 From: mwish Date: Thu, 9 May 2024 16:38:50 +0800 Subject: [PATCH] Separate def level and rep level handling --- cpp/src/parquet/column_reader.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index d3f918186247f..ca18882b6173f 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1699,28 +1699,28 @@ class TypedRecordReader : public TypedColumnReaderImpl, // Count logical records and number of non-null values to read ARROW_DCHECK(!at_record_start_); + // Scan repetition levels to find record end + int64_t level = levels_position_; while (levels_position_ < levels_written_) { int64_t stride = std::min(levels_written_ - levels_position_, num_records - records_read); const int64_t position_end = levels_position_ + stride; for (int64_t i = levels_position_; i < position_end; ++i) { records_read += rep_levels[i] == 0; - values_to_read += def_levels[i] == this->max_def_level_; } - levels_position_ = position_end; if (records_read == num_records) { // Last rep_level reaches the boundary ARROW_CHECK_EQ(rep_levels[levels_position_ - 1], 0); // We've found the number of records we were looking for. Set // at_record_start_ to true and break at_record_start_ = true; - // Remove last value if we have reaches the end of the record - levels_position_ = levels_position_ - 1; - values_to_read -= def_levels[levels_position_] == this->max_def_level_; break; } + levels_position_ = position_end; } - *values_seen = values_to_read; + // Scan definition levels to find number of physical values + *values_seen = std::count(def_levels + level, def_levels + levels_position_, + this->max_def_level_); return records_read; }