Skip to content

Commit

Permalink
Add multiversioning for filtering and make it an optional, opt-in fea…
Browse files Browse the repository at this point in the history
…ture
  • Loading branch information
Shnatsel committed Sep 29, 2024
1 parent 3a7a70d commit 03be367
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 9 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ crc32fast = "1.2.0"
fdeflate = "0.3.3"
flate2 = "1.0.11"
miniz_oxide = { version = "0.8", features = ["simd"] }
multiversion = "0.7.4"
multiversion = { version = "0.7.4", optional = true }

[dev-dependencies]
byteorder = "1.5.0"
Expand All @@ -41,6 +41,7 @@ term = "0.7"
[features]
unstable = []
benchmarks = []
multiversioning = ["multiversion"]

[[bench]]
path = "benches/decoder.rs"
Expand Down
25 changes: 17 additions & 8 deletions src/filter.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use core::convert::TryInto;

#[cfg(feature = "multiversioning")]
use multiversion::multiversion;

use crate::common::BytesPerPixel;
Expand All @@ -14,6 +15,7 @@ mod simd {
use std::simd::cmp::{SimdOrd, SimdPartialEq, SimdPartialOrd};
use std::simd::num::{SimdInt, SimdUint};
use std::simd::{u8x4, u8x8, LaneCount, Simd, SimdElement, SupportedLaneCount};
#[cfg(feature = "multiversioning")]
use multiversion::multiversion;

/// This is an equivalent of the `PaethPredictor` function from
Expand Down Expand Up @@ -170,10 +172,10 @@ mod simd {
dest[0..3].copy_from_slice(&src.to_array()[0..3])
}

#[multiversion(targets(
#[cfg_attr(feature = "multiversioning", multiversion(targets(
"x86_64+sse+sse2+sse3+sse4.1+ssse3", // SSE4.1 is enough because our vectors are 128bit
"arm+neon", // 32-bit ARM only; 64-bit always has NEON
))]
)))]
/// Undoes `FilterType::Paeth` for `BytesPerPixel::Three`.
pub fn unfilter_paeth3(mut prev_row: &[u8], mut curr_row: &mut [u8]) {
debug_assert_eq!(prev_row.len(), curr_row.len());
Expand Down Expand Up @@ -205,10 +207,10 @@ mod simd {
store3(x, curr_row);
}

#[multiversion(targets(
#[cfg_attr(feature = "multiversioning", multiversion(targets(
"x86_64+sse+sse2+sse3+sse4.1+ssse3", // SSE4.1 is enough because our vectors are 128bit
"arm+neon", // 32-bit ARM only; 64-bit always has NEON
))]
)))]
/// Undoes `FilterType::Paeth` for `BytesPerPixel::Four` and `BytesPerPixel::Eight`.
///
/// This function calculates the Paeth predictor entirely in `Simd<u8, N>`
Expand Down Expand Up @@ -241,10 +243,10 @@ mod simd {
dest[0..6].copy_from_slice(&src.to_array()[0..6])
}

#[multiversion(targets(
#[cfg_attr(feature = "multiversioning", multiversion(targets(
"x86_64+sse+sse2+sse3+sse4.1+ssse3", // x86-64-v2, higher levels provide no benefit
"arm+neon", // 32-bit ARM only; 64-bit always has NEON
))]
)))]
/// Undoes `FilterType::Paeth` for `BytesPerPixel::Six`.
pub fn unfilter_paeth6(mut prev_row: &[u8], mut curr_row: &mut [u8]) {
debug_assert_eq!(prev_row.len(), curr_row.len());
Expand Down Expand Up @@ -396,10 +398,10 @@ fn filter_paeth(a: u8, b: u8, c: u8) -> u8 {
}
}

#[multiversion(targets(
#[cfg_attr(feature = "multiversioning", multiversion(targets(
"x86_64+sse+sse2+sse3+sse4.1+ssse3", // SSE4.1 is enough because our vectors are 128bit
"arm+neon", // 32-bit ARM only; 64-bit always has NEON
))]
)))]
pub(crate) fn unfilter(
mut filter: FilterType,
tbpp: BytesPerPixel,
Expand Down Expand Up @@ -912,6 +914,13 @@ pub(crate) fn unfilter(
}
}

#[cfg_attr(feature = "multiversioning", multiversion(targets(
// SSE4.1 only gives a +15% boost to Paeth, not worth the bloat.
// AVX regresses fast filters but speeds up slow ones. Another +10% to Paeth. Not worth it?
// AVX2 makes everything go BRRRRRRRRRR, with more than double performance for Paeth
"x86_64+sse+sse2+sse3+sse4.1+sse4.2+ssse3+avx+avx2+fma",
"arm+neon", // 32-bit ARM only; 64-bit always has NEON
)))]
fn filter_internal(
method: FilterType,
bpp: usize,
Expand Down

0 comments on commit 03be367

Please sign in to comment.