Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid 32kB decompression lag + compact less often. #447

Merged
merged 1 commit into from
Jan 6, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 43 additions & 8 deletions src/decoder/zlib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@ pub(super) struct ZlibStream {
/// The decoder sometimes wants inspect some already finished bytes for further decoding. So we
/// keep a total of 32KB of decoded data available as long as more data may be appended.
out_buffer: Vec<u8>,
/// The cursor position in the output stream as a buffer index.
/// The first index of `out_buffer` where new data can be written.
out_pos: usize,
/// The first index of `out_buffer` that hasn't yet been passed to our client
/// (i.e. not yet appended to the `image_data` parameter of `fn decompress` or `fn
/// finish_compressed_chunks`).
read_pos: usize,
/// Limit on how many bytes can be decompressed in total. This field is mostly used for
/// performance optimizations (e.g. to avoid allocating and zeroing out large buffers when only
/// a small image is being decoded).
Expand All @@ -33,6 +37,7 @@ impl ZlibStream {
started: false,
out_buffer: Vec::new(),
out_pos: 0,
read_pos: 0,
max_total_output: usize::MAX,
ignore_adler32: true,
}
Expand All @@ -42,6 +47,7 @@ impl ZlibStream {
self.started = false;
self.out_buffer.clear();
self.out_pos = 0;
self.read_pos = 0;
self.max_total_output = usize::MAX;
*self.state = Decompressor::new();
}
Expand Down Expand Up @@ -94,6 +100,7 @@ impl ZlibStream {
self.started = true;
self.out_pos += out_consumed;
self.transfer_finished_data(image_data);
self.compact_out_buffer_if_needed();

Ok(in_consumed)
}
Expand Down Expand Up @@ -128,11 +135,12 @@ impl ZlibStream {
transferred > 0 || out_consumed > 0,
"No more forward progress made in stream decoding."
);
self.compact_out_buffer_if_needed();
}
}

self.out_buffer.truncate(self.out_pos);
image_data.append(&mut self.out_buffer);
self.transfer_finished_data(image_data);
self.out_buffer.clear();
Ok(())
}

Expand Down Expand Up @@ -179,10 +187,37 @@ impl ZlibStream {
}

fn transfer_finished_data(&mut self, image_data: &mut Vec<u8>) -> usize {
let safe = self.out_pos.saturating_sub(CHUNCK_BUFFER_SIZE);
// TODO: allocation limits.
image_data.extend(self.out_buffer.drain(..safe));
self.out_pos -= safe;
safe
let transferred = &self.out_buffer[self.read_pos..self.out_pos];
image_data.extend_from_slice(transferred);
self.read_pos = self.out_pos;
transferred.len()
}

fn compact_out_buffer_if_needed(&mut self) {
// [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that
// "deflate/inflate compression with a sliding window (which is an upper bound on the
// distances appearing in the deflate stream) of at most 32768 bytes".
//
// `fdeflate` requires that we keep this many most recently decompressed bytes in the
// `out_buffer` - this allows referring back to them when handling "length and distance
// codes" in the deflate stream).
const LOOKBACK_SIZE: usize = 32768;

// Compact `self.out_buffer` when "needed". Doing this conditionally helps to put an upper
// bound on the amortized cost of copying the data within `self.out_buffer`.
//
// TODO: The factor of 4 is an ad-hoc heuristic. Consider measuring and using a different
// factor. (Early experiments seem to indicate that factor of 4 is faster than a factor of
// 2 and 4 * `LOOKBACK_SIZE` seems like an acceptable memory trade-off. Higher factors
// result in higher memory usage, but the compaction cost is lower - factor of 4 means
// that 1 byte gets copied during compaction for 3 decompressed bytes.)
if self.out_pos > LOOKBACK_SIZE * 4 {
// Only preserve the `lookback_buffer` and "throw away" the earlier prefix.
let lookback_buffer = self.out_pos.saturating_sub(LOOKBACK_SIZE)..self.out_pos;
let preserved_len = lookback_buffer.len();
self.out_buffer.copy_within(lookback_buffer, 0);
self.read_pos = preserved_len;
self.out_pos = preserved_len;
}
}
}
Loading