From 0529cce31a71d4c2f8236994efa94e7817b608aa Mon Sep 17 00:00:00 2001 From: mwish Date: Sat, 6 Jan 2024 16:46:40 +0800 Subject: [PATCH] DNM: benchmark for std::count --- cpp/src/parquet/column_reader.cc | 14 ++++--- cpp/src/parquet/column_reader.h | 3 ++ cpp/src/parquet/column_reader_benchmark.cc | 43 ++++++++++++++++++++-- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 99978e283b46a..33b491f1d58f7 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1053,7 +1053,7 @@ class TypedColumnReaderImpl : public TypedColumnReader, // Read definition and repetition levels. Also return the number of definition levels // and number of values to read. This function is called before reading values. void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, - int64_t* num_def_levels, int64_t* values_to_read) { + int64_t* num_def_levels, int64_t* values_to_read) final { batch_size = std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); @@ -1062,11 +1062,13 @@ class TypedColumnReaderImpl : public TypedColumnReader, *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); // TODO(wesm): this tallying of values-to-decode can be performed with better // cache-efficiency if fused with the level decoding. - for (int64_t i = 0; i < *num_def_levels; ++i) { - if (def_levels[i] == this->max_def_level_) { - ++(*values_to_read); - } - } + // for (int64_t i = 0; i < *num_def_levels; ++i) { + // if (def_levels[i] == this->max_def_level_) { + // ++(*values_to_read); + // } + // } + *values_to_read += + std::count(def_levels, def_levels + *num_def_levels, this->max_def_level_); } else { // Required field, read all values *values_to_read = batch_size; diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 086f6c0e55806..a8ac7871d4f15 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -300,6 +300,9 @@ class TypedColumnReader : public ColumnReader { int16_t* rep_levels, int32_t* indices, int64_t* indices_read, const T** dict, int32_t* dict_len) = 0; + + virtual void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, + int64_t* num_def_levels, int64_t* values_to_read); }; namespace internal { diff --git a/cpp/src/parquet/column_reader_benchmark.cc b/cpp/src/parquet/column_reader_benchmark.cc index 49b2317ede187..ed7e7508fc6af 100644 --- a/cpp/src/parquet/column_reader_benchmark.cc +++ b/cpp/src/parquet/column_reader_benchmark.cc @@ -39,11 +39,14 @@ class BenchmarkHelper { NodePtr type = schema::Int32("b", repetition); if (repetition == Repetition::REQUIRED) { - descr_ = std::make_unique(type, 0, 0); + descr_ = std::make_unique(type, /*max_definition_level=*/0, + /*max_repetition_level=*/0); } else if (repetition == Repetition::OPTIONAL) { - descr_ = std::make_unique(type, 1, 0); + descr_ = std::make_unique(type, /*max_definition_level=*/1, + /*max_repetition_level=*/0); } else { - descr_ = std::make_unique(type, 1, 1); + descr_ = std::make_unique(type, /*max_definition_level=*/1, + /*max_repetition_level=*/1); } // Vectors filled with random rep/defs and values to make pages. @@ -141,6 +144,35 @@ static void ColumnReaderReadBatchInt32(::benchmark::State& state) { state.SetBytesProcessed(state.iterations() * helper.total_size()); } +// Benchmarks ReadBatch for ColumnReader with the following parameters in order: +// - repetition: 0 for REQUIRED, 1 for OPTIONAL, 2 for REPEATED. +// - batch_size: sets how many values to read at each call. +static void ColumnReaderReadLevels(::benchmark::State& state) { + const auto repetition = static_cast(state.range(0)); + const auto batch_size = static_cast(state.range(1)); + + BenchmarkHelper helper(repetition, /*num_pages=*/1, /*levels_per_page=*/16 * 80000); + + // Vectors to read the values into. + std::vector read_values(batch_size, -1); + std::vector read_defs(batch_size, -1); + std::vector read_reps(batch_size, -1); + for (auto _ : state) { + state.PauseTiming(); + Int32Reader* reader = helper.ResetColumnReader(); + [[maybe_unused]] bool v = reader->HasNext(); + state.ResumeTiming(); + int64_t num_levels = 0; + do { + int64_t values_read = 0; + reader->ReadLevels(batch_size, read_defs.data(), read_reps.data(), &num_levels, + &values_read); + } while (num_levels != 0); + } + + state.SetBytesProcessed(state.iterations() * helper.total_size()); +} + // Benchmarks ReadRecords for RecordReader with the following parameters in order: // - repetition: 0 for REQUIRED, 1 for OPTIONAL, 2 for REPEATED. // - batch_size: sets how many values to read at each call. @@ -204,6 +236,11 @@ BENCHMARK(ColumnReaderReadBatchInt32) ->Args({1, 1000}) ->Args({2, 1000}); +BENCHMARK(ColumnReaderReadLevels) + ->ArgNames({"Repetition", "BatchSize"}) + ->Args({1, 1000}) + ->Args({2, 1000}); + BENCHMARK(RecordReaderSkipRecords) ->ArgNames({"Repetition", "BatchSize"}) ->Args({0, 1000})