Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Granular benchmarks of palette transformations. #460

Merged
merged 5 commits into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,9 @@ path = "benches/unfilter.rs"
name = "unfilter"
harness = false
required-features = ["benchmarks"]

[[bench]]
path = "benches/expand_paletted.rs"
name = "expand_paletted"
harness = false
required-features = ["benchmarks"]
155 changes: 155 additions & 0 deletions benches/expand_paletted.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
//! Usage example:
//!
//! ```
//! $ alias bench="rustup run nightly cargo bench"
//! $ bench --bench=expand_paletted --features=benchmarks -- --save-baseline my_baseline
//! ... tweak something ...
//! $ bench --bench=expand_paletted --features=benchmarks -- --baseline my_baseline
//! ```

use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use png::benchable_apis::{create_info_from_plte_trns_bitdepth, create_transform_fn, TransformFn};
use png::{Info, Transformations};
use rand::Rng;
use std::fmt::{self, Display};

#[derive(Clone, Copy)]
enum TrnsPresence {
Present,
Absent,
}

impl Display for TrnsPresence {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TrnsPresence::Present => write!(f, "trns=yes"),
TrnsPresence::Absent => write!(f, "trns=no"),
}
}
}

fn expand_paletted_all(c: &mut Criterion) {
let trns_options = [TrnsPresence::Absent, TrnsPresence::Present];
let bit_depths = [4, 8];

let input_size = {
let typical_l1_cache_size = 32 * 1024;
let mut factor = 1; // input
factor += 4; // RGBA output
factor += 1; // other data
typical_l1_cache_size / factor
};

for trns in trns_options.iter().copied() {
for bit_depth in bit_depths.iter().copied() {
bench_expand_palette(c, trns, bit_depth, input_size);
}
}

bench_create_fn(c, 256, 256); // Full PLTE and trNS
bench_create_fn(c, 224, 32); // Partial PLTE and trNS
bench_create_fn(c, 16, 1); // Guess: typical for small images?
}

criterion_group!(benches, expand_paletted_all);
criterion_main!(benches);

fn get_random_bytes<R: Rng>(rng: &mut R, n: usize) -> Vec<u8> {
use rand::Fill;
let mut result = vec![0u8; n];
result.as_mut_slice().try_fill(rng).unwrap();
result
}

struct Input {
palette: Vec<u8>,
trns: Option<Vec<u8>>,
src: Vec<u8>,
src_bit_depth: u8,
}

impl Input {
fn new(trns: TrnsPresence, src_bit_depth: u8, input_size_in_bytes: usize) -> Self {
let mut rng = rand::thread_rng();

// We provide RGB entries for 192 out of 256 possible indices and Alpha/Transparency
// entries for 32 out of 256 possible indices. Rationale for these numbers:
// * Oftentimes only a handful of colors at the edges of an icon need transparency
// * In general, code needs to handle out-of-bounds indices, so it seems desirable
// to explicitly test this.
let palette = get_random_bytes(&mut rng, 192.min(input_size_in_bytes) * 3);
let trns = match trns {
TrnsPresence::Absent => None,
TrnsPresence::Present => Some(get_random_bytes(&mut rng, 32.min(input_size_in_bytes))),
};
let src = get_random_bytes(&mut rng, input_size_in_bytes);

Self {
palette,
trns,
src,
src_bit_depth,
}
}

fn output_size_in_bytes(&self) -> usize {
let output_bytes_per_input_sample = match self.trns {
None => 3,
Some(_) => 4,
};
let samples_count_per_byte = (8 / self.src_bit_depth) as usize;
let samples_count = self.src.len() * samples_count_per_byte;
samples_count * output_bytes_per_input_sample
}

fn to_info(&self) -> Info {
create_info_from_plte_trns_bitdepth(&self.palette, self.trns.as_deref(), self.src_bit_depth)
}
}

#[inline(always)]
fn create_expand_palette_fn(info: &Info) -> TransformFn {
create_transform_fn(info, Transformations::EXPAND).unwrap()
}

fn bench_create_fn(c: &mut Criterion, plte_size: usize, trns_size: usize) {
let mut group = c.benchmark_group("expand_paletted(ctor)");
group.sample_size(10000);

let mut rng = rand::thread_rng();
let plte = get_random_bytes(&mut rng, plte_size as usize);
let trns = get_random_bytes(&mut rng, trns_size as usize);
let info = create_info_from_plte_trns_bitdepth(&plte, Some(&trns), 8);
group.bench_with_input(
format!("plte={plte_size}/trns={trns_size:?}"),
&info,
|b, info| {
b.iter(|| create_expand_palette_fn(info));
},
);
}

fn bench_expand_palette(
c: &mut Criterion,
trns: TrnsPresence,
src_bit_depth: u8,
input_size_in_bytes: usize,
) {
let mut group = c.benchmark_group("expand_paletted(exec)");

let input = Input::new(trns, src_bit_depth, input_size_in_bytes);
let transform_fn = create_expand_palette_fn(&input.to_info());
group.throughput(Throughput::Bytes(input.output_size_in_bytes() as u64));
group.sample_size(500);
group.bench_with_input(
format!("{trns}/src_bits={src_bit_depth}/src_size={input_size_in_bytes}"),
&input,
|b, input| {
let mut output = vec![0; input.output_size_in_bytes()];
let info = input.to_info();
b.iter(|| {
transform_fn(input.src.as_slice(), output.as_mut_slice(), &info);
});
},
);
}
91 changes: 0 additions & 91 deletions src/utils.rs → src/adam7.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,97 +2,6 @@
use std::iter::StepBy;
use std::ops::Range;

#[inline(always)]
pub fn unpack_bits<F>(input: &[u8], output: &mut [u8], channels: usize, bit_depth: u8, func: F)
where
F: Fn(u8, &mut [u8]),
{
// Only [1, 2, 4, 8] are valid bit depths
assert!(matches!(bit_depth, 1 | 2 | 4 | 8));
// Check that `input` is capable of producing a buffer as long as `output`:
// number of shift lookups per bit depth * channels * input length
assert!((8 / bit_depth as usize * channels).saturating_mul(input.len()) >= output.len());

let mut buf_chunks = output.chunks_exact_mut(channels);
let mut iter = input.iter();

// `shift` iterates through the corresponding bit depth sequence:
// 1 => &[7, 6, 5, 4, 3, 2, 1, 0],
// 2 => &[6, 4, 2, 0],
// 4 => &[4, 0],
// 8 => &[0],
//
// `(0..8).step_by(bit_depth.into()).rev()` doesn't always optimize well so
// shifts are calculated instead. (2023-08, Rust 1.71)

if bit_depth == 8 {
for (&curr, chunk) in iter.zip(&mut buf_chunks) {
func(curr, chunk);
}
} else {
let mask = ((1u16 << bit_depth) - 1) as u8;

// These variables are initialized in the loop
let mut shift = -1;
let mut curr = 0;

for chunk in buf_chunks {
if shift < 0 {
shift = 8 - bit_depth as i32;
curr = *iter.next().expect("input for unpack bits is not empty");
}

let pixel = (curr >> shift) & mask;
func(pixel, chunk);

shift -= bit_depth as i32;
}
}
}

pub fn expand_trns_line(input: &[u8], output: &mut [u8], trns: Option<&[u8]>, channels: usize) {
for (input, output) in input
.chunks_exact(channels)
.zip(output.chunks_exact_mut(channels + 1))
{
output[..channels].copy_from_slice(input);
output[channels] = if Some(input) == trns { 0 } else { 0xFF };
}
}

pub fn expand_trns_line16(input: &[u8], output: &mut [u8], trns: Option<&[u8]>, channels: usize) {
for (input, output) in input
.chunks_exact(channels * 2)
.zip(output.chunks_exact_mut(channels * 2 + 2))
{
output[..channels * 2].copy_from_slice(input);
if Some(input) == trns {
output[channels * 2] = 0;
output[channels * 2 + 1] = 0
} else {
output[channels * 2] = 0xFF;
output[channels * 2 + 1] = 0xFF
};
}
}

pub fn expand_trns_and_strip_line16(
input: &[u8],
output: &mut [u8],
trns: Option<&[u8]>,
channels: usize,
) {
for (input, output) in input
.chunks_exact(channels * 2)
.zip(output.chunks_exact_mut(channels + 1))
{
for i in 0..channels {
output[i] = input[i * 2];
}
output[channels] = if Some(input) == trns { 0 } else { 0xFF };
}
}

/// This iterator iterates over the different passes of an image Adam7 encoded
/// PNG image
/// The pattern is:
Expand Down
17 changes: 17 additions & 0 deletions src/benchable_apis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,27 @@

use crate::common::BytesPerPixel;
use crate::filter::FilterType;
use crate::{BitDepth, ColorType, Info};

/// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only
/// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`.
pub fn unfilter(filter: FilterType, tbpp: u8, previous: &[u8], current: &mut [u8]) {
let tbpp = BytesPerPixel::from_usize(tbpp as usize);
crate::filter::unfilter(filter, tbpp, previous, current)
}

pub use crate::decoder::transform::{create_transform_fn, TransformFn};

pub fn create_info_from_plte_trns_bitdepth<'a>(
plte: &'a [u8],
trns: Option<&'a [u8]>,
bit_depth: u8,
) -> Info<'a> {
Info {
color_type: ColorType::Indexed,
bit_depth: BitDepth::from_u8(bit_depth).unwrap(),
palette: Some(plte.into()),
trns: trns.map(Into::into),
..Info::default()
}
}
Loading
Loading