Skip to content

Commit

Permalink
Add levels decoding basic benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Jan 19, 2024
1 parent 55afcf0 commit 51529e0
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 2 deletions.
81 changes: 81 additions & 0 deletions cpp/src/parquet/column_reader_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,5 +219,86 @@ BENCHMARK(RecordReaderReadRecords)
->Args({2, 1000, true})
->Args({2, 1000, false});

void GenerateLevels(int level_repeats, int max_level, int num_levels,
std::vector<int16_t>& input_levels) {
// Generate random levels
std::default_random_engine gen(/*seed=*/1943);
std::uniform_int_distribution<int16_t> d(0, max_level);
for (int i = 0; i < num_levels;) {
for (int j = 0; j < level_repeats; ++j) {
input_levels.push_back(d(gen));
++i;
}
}
}

void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
const int16_t* input_levels, std::vector<uint8_t>& bytes) {
LevelEncoder encoder;
int levels_count = 0;
bytes.resize(2 * num_levels);
ASSERT_EQ(2 * num_levels, static_cast<int>(bytes.size()));
// encode levels
if (encoding == Encoding::RLE) {
// leave space to write the rle length value
encoder.Init(encoding, max_level, num_levels, bytes.data() + sizeof(int32_t),
static_cast<int>(bytes.size()));

levels_count = encoder.Encode(num_levels, input_levels);
(reinterpret_cast<int32_t*>(bytes.data()))[0] = encoder.len();
} else {
encoder.Init(encoding, max_level, num_levels, bytes.data(),
static_cast<int>(bytes.size()));
levels_count = encoder.Encode(num_levels, input_levels);
}
ASSERT_EQ(num_levels, levels_count);
}

static void DecodeLevels(Encoding::type level_encoding, int16_t max_level, int num_levels,
int batch_size, int level_repeat_count,
::benchmark::State& state) {
std::vector<uint8_t> bytes;
{
std::vector<int16_t> input_levels;
GenerateLevels(/*level_repeats=*/level_repeat_count, /*max_repeat_factor=*/max_level,
num_levels, input_levels);
// Print generated levels
EncodeLevels(level_encoding, max_level, num_levels, input_levels.data(), bytes);
}

LevelDecoder decoder;
std::vector<int16_t> output_levels(num_levels);
for (auto _ : state) {
decoder.SetData(level_encoding, max_level, num_levels, bytes.data(),
static_cast<int>(bytes.size()));
// Decode multiple times with batch_size
while (true) {
int levels_decoded = decoder.Decode(batch_size, output_levels.data());
if (levels_decoded == 0) {
break;
}
}
}
}

static void ReadLevels(::benchmark::State& state) {
int16_t max_level = static_cast<int16_t>(state.range(0));
int num_levels = static_cast<int>(state.range(1));
int batch_size = static_cast<int>(state.range(2));
int level_repeat_count = static_cast<int>(state.range(3));
DecodeLevels(Encoding::RLE, max_level, num_levels, batch_size, level_repeat_count,
state);
}

BENCHMARK(ReadLevels)
->ArgNames({"MaxLevel", "NumLevels", "BatchSize", "LevelRepeatCount"})
->Args({1, 8096, 1024, 1})
->Args({1, 8096, 1024, 7})
->Args({1, 8096, 1024, 1024})
->Args({1, 8096, 2048, 1})
->Args({3, 8096, 1024, 1})
->Args({3, 8096, 2048, 1})
->Args({3, 8096, 1024, 7});

} // namespace benchmark
} // namespace parquet
4 changes: 2 additions & 2 deletions cpp/src/parquet/column_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
}

void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level,
std::vector<int16_t>& input_levels,
const std::vector<int16_t>& input_levels,
std::vector<uint8_t>& bytes) {
LevelDecoder decoder;
int levels_count = 0;
Expand Down Expand Up @@ -1015,7 +1015,7 @@ void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level,
}

void VerifyDecodingMultipleSetData(Encoding::type encoding, int16_t max_level,
std::vector<int16_t>& input_levels,
const std::vector<int16_t>& input_levels,
std::vector<std::vector<uint8_t>>& bytes) {
LevelDecoder decoder;
int levels_count = 0;
Expand Down

0 comments on commit 51529e0

Please sign in to comment.