Skip to content

Commit

Permalink
Remove RGBA codepaths using std::simd types since they regress perfor…
Browse files Browse the repository at this point in the history
…mance on Windows, see image-rs#567
  • Loading branch information
Shnatsel committed Feb 16, 2025
1 parent c839711 commit 19dcfc8
Showing 1 changed file with 0 additions and 71 deletions.
71 changes: 0 additions & 71 deletions src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,6 @@ mod simd {
out.into()
}

/// Functionally equivalent to `simd::paeth_predictor` but does not temporarily convert
/// the SIMD elements to `i16`.
fn paeth_predictor_u8<const N: usize>(
a: Simd<u8, N>,
b: Simd<u8, N>,
c: Simd<u8, N>,
) -> Simd<u8, N>
where
LaneCount<N>: SupportedLaneCount,
{
let mut out = [0; N];
for i in 0..N {
out[i] = super::filter_paeth_stbi(a[i].into(), b[i].into(), c[i].into());
}
out.into()
}

/// Memory of previous pixels (as needed to unfilter `Filter::Paeth`).
/// See also https://www.w3.org/TR/png/#filter-byte-positions
#[derive(Default)]
Expand Down Expand Up @@ -98,24 +81,6 @@ mod simd {
state.a = x.cast::<i16>();
}

/// Computes the Paeth predictor without converting `u8` to `i16`.
///
/// See `simd::paeth_step`.
fn paeth_step_u8<const N: usize>(
state: &mut PaethState<u8, N>,
b: Simd<u8, N>,
x: &mut Simd<u8, N>,
) where
LaneCount<N>: SupportedLaneCount,
{
// Calculating the new value of the current pixel.
*x += paeth_predictor_u8(state.a, b, state.c);

// Preparing for the next step.
state.c = b;
state.a = *x;
}

fn load3(src: &[u8]) -> u8x4 {
u8x4::from_array([src[0], src[1], src[2], 0])
}
Expand Down Expand Up @@ -155,30 +120,6 @@ mod simd {
store3(x, curr_row);
}

/// Undoes `Filter::Paeth` for `BytesPerPixel::Four` and `BytesPerPixel::Eight`.
///
/// This function calculates the Paeth predictor entirely in `Simd<u8, N>`
/// without converting to an intermediate `Simd<i16, N>`. Doing so avoids
/// paying a small performance penalty converting between types.
pub fn unfilter_paeth_u8<const N: usize>(prev_row: &[u8], curr_row: &mut [u8])
where
LaneCount<N>: SupportedLaneCount,
{
debug_assert_eq!(prev_row.len(), curr_row.len());
debug_assert_eq!(prev_row.len() % N, 0);
assert!(matches!(N, 4 | 8));

let mut state = PaethState::<u8, N>::default();
for (prev_row, curr_row) in prev_row.chunks_exact(N).zip(curr_row.chunks_exact_mut(N)) {
let b = Simd::from_slice(prev_row);
let mut x = Simd::from_slice(curr_row);

paeth_step_u8(&mut state, b, &mut x);

curr_row[..N].copy_from_slice(&x.to_array()[..N]);
}
}

fn load6(src: &[u8]) -> u8x8 {
u8x8::from_array([src[0], src[1], src[2], src[3], src[4], src[5], 0, 0])
}
Expand Down Expand Up @@ -804,12 +745,6 @@ pub(crate) fn unfilter(
}
}
BytesPerPixel::Four => {
#[cfg(all(feature = "unstable", target_arch = "x86_64"))]
{
simd::unfilter_paeth_u8::<4>(previous, current);
return;
}

let mut a_bpp = [0; 4];
let mut c_bpp = [0; 4];
for (chunk, b_bpp) in current.chunks_exact_mut(4).zip(previous.chunks_exact(4))
Expand Down Expand Up @@ -860,12 +795,6 @@ pub(crate) fn unfilter(
}
}
BytesPerPixel::Eight => {
#[cfg(all(feature = "unstable", target_arch = "x86_64"))]
{
simd::unfilter_paeth_u8::<8>(previous, current);
return;
}

let mut a_bpp = [0; 8];
let mut c_bpp = [0; 8];
for (chunk, b_bpp) in current.chunks_exact_mut(8).zip(previous.chunks_exact(8))
Expand Down

0 comments on commit 19dcfc8

Please sign in to comment.