diff --git a/examples/pngcheck.rs b/examples/pngcheck.rs index 69e95e3c..67f5167e 100644 --- a/examples/pngcheck.rs +++ b/examples/pngcheck.rs @@ -174,7 +174,7 @@ fn check_image>(c: Config, fname: P) -> io::Result<()> { } buf = &data[..n]; } - match decoder.update(buf, &mut Vec::new()) { + match decoder.update(buf, &mut Vec::new(), &mut 0) { Ok((_, ImageEnd)) => { if !have_idat { // This isn't beautiful. But it works. diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index 3d7c89c0..60d75681 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -204,7 +204,7 @@ impl Decoder { let mut buf = Vec::new(); while self.read_decoder.info().is_none() { buf.clear(); - if self.read_decoder.decode_next(&mut buf)?.is_none() { + if self.read_decoder.decode_next(&mut buf, &mut 0)?.is_none() { return Err(DecodingError::Format( FormatErrorInner::UnexpectedEof.into(), )); @@ -226,6 +226,8 @@ impl Decoder { data_stream: Vec::new(), prev_start: 0, current_start: 0, + lookback_start: 0, + write_start: 0, transform: self.transform, scratch_buffer: Vec::new(), }; @@ -295,7 +297,7 @@ struct ReadDecoder { impl ReadDecoder { /// Returns the next decoded chunk. If the chunk is an ImageData chunk, its contents are written /// into image_data. - fn decode_next(&mut self, image_data: &mut Vec) -> Result, DecodingError> { + fn decode_next(&mut self, image_data: &mut Vec, out_pos: &mut usize) -> Result, DecodingError> { while !self.at_eof { let (consumed, result) = { let buf = self.reader.fill_buf()?; @@ -304,7 +306,7 @@ impl ReadDecoder { FormatErrorInner::UnexpectedEof.into(), )); } - self.decoder.update(buf, image_data)? + self.decoder.update(buf, image_data, out_pos)? }; self.reader.consume(consumed); match result { @@ -324,7 +326,7 @@ impl ReadDecoder { FormatErrorInner::UnexpectedEof.into(), )); } - let (consumed, event) = self.decoder.update(buf, &mut vec![])?; + let (consumed, event) = self.decoder.update(buf, &mut vec![], &mut 0)?; self.reader.consume(consumed); match event { Decoded::Nothing => (), @@ -365,6 +367,10 @@ pub struct Reader { prev_start: usize, /// Index in `data_stream` where the current row starts. current_start: usize, + /// Index in `data_stream` of the first byte still needed by deflate lookback. + lookback_start: usize, + /// Index in `data_stream` where new data can be written. + write_start: usize, /// Output transformations transform: Transformations, /// This buffer is only used so that `next_row` and `next_interlaced_row` can return reference @@ -415,7 +421,7 @@ impl Reader { // know that we will stop before reading any image data from the stream. Thus pass an // empty buffer and assert that remains empty. let mut buf = Vec::new(); - let state = self.decoder.decode_next(&mut buf)?; + let state = self.decoder.decode_next(&mut buf, &mut 0)?; assert!(buf.is_empty()); match state { @@ -514,6 +520,8 @@ impl Reader { self.data_stream.clear(); self.current_start = 0; self.prev_start = 0; + self.lookback_start = 0; + self.write_start = 0; let width = self.info().width; if self.info().interlaced { while let Some(InterlacedRow { @@ -605,9 +613,10 @@ impl Reader { self.data_stream.clear(); self.current_start = 0; self.prev_start = 0; + self.lookback_start = 0; loop { let mut buf = Vec::new(); - let state = self.decoder.decode_next(&mut buf)?; + let state = self.decoder.decode_next(&mut buf, &mut 0)?; if state.is_none() { break; @@ -749,44 +758,65 @@ impl Reader { /// /// The scanline is filtered against the previous scanline according to the specification. fn next_raw_interlaced_row(&mut self, rowlen: usize) -> Result<(), DecodingError> { - // Read image data until we have at least one full row (but possibly more than one). - while self.data_stream.len() - self.current_start < rowlen { - if self.subframe.consumed_and_flushed { - return Err(DecodingError::Format( - FormatErrorInner::NoMoreImageData.into(), - )); - } + // Read image data if we don't yet have enough data for the next row. + if self.lookback_start - self.current_start < rowlen { + let image_bytes = self + .subframe + .rowlen + .saturating_mul(self.subframe.height as usize); + let target_bytes = (256 << 10).min(image_bytes); // Clear the current buffer before appending more data. - if self.prev_start > 0 { + if self.prev_start > 0 && self.data_stream.len() < image_bytes { self.data_stream.copy_within(self.prev_start.., 0); self.data_stream .truncate(self.data_stream.len() - self.prev_start); + self.write_start -= self.prev_start; + self.lookback_start -= self.prev_start; self.current_start -= self.prev_start; self.prev_start = 0; } - match self.decoder.decode_next(&mut self.data_stream)? { - Some(Decoded::ImageData) => {} - Some(Decoded::ImageDataFlushed) => { - self.subframe.consumed_and_flushed = true; - } - None => { + self.data_stream.resize(self.data_stream.len().max(target_bytes), 0); + + while self.lookback_start - self.current_start < target_bytes { + if self.subframe.consumed_and_flushed { return Err(DecodingError::Format( - if self.data_stream.is_empty() { - FormatErrorInner::NoMoreImageData + FormatErrorInner::NoMoreImageData.into(), + )); + } + + match self.decoder.decode_next(&mut self.data_stream, &mut self.write_start)? { + Some(Decoded::ImageData) => { + // TODO: interlaced + if self.data_stream.len() >= image_bytes && matches!(self.subframe.interlace, InterlaceIter::None(_)) { + self.lookback_start = self.write_start; } else { - FormatErrorInner::UnexpectedEndOfChunk + self.lookback_start = self.write_start.saturating_sub(32 << 10); } - .into(), - )); + } + Some(Decoded::ImageDataFlushed) => { + self.lookback_start = self.write_start; + self.subframe.consumed_and_flushed = true; + break; + } + None => { + return Err(DecodingError::Format( + if self.data_stream.is_empty() { + FormatErrorInner::NoMoreImageData + } else { + FormatErrorInner::UnexpectedEndOfChunk + } + .into(), + )); + } + _ => (), } - _ => (), } } // Get a reference to the current row and point scan_start to the next one. - let (prev, row) = self.data_stream.split_at_mut(self.current_start); + let (prev, row) = self.data_stream[..self.lookback_start].split_at_mut(self.current_start); // Unfilter the row. let filter = FilterType::from_u8(row[0]).ok_or(DecodingError::Format( diff --git a/src/decoder/stream.rs b/src/decoder/stream.rs index 584bdfaf..fb614f58 100644 --- a/src/decoder/stream.rs +++ b/src/decoder/stream.rs @@ -581,10 +581,11 @@ impl StreamingDecoder { &mut self, mut buf: &[u8], image_data: &mut Vec, + out_pos: &mut usize, ) -> Result<(usize, Decoded), DecodingError> { let len = buf.len(); while !buf.is_empty() && self.state.is_some() { - match self.next_state(buf, image_data) { + match self.next_state(buf, image_data, out_pos) { Ok((bytes, Decoded::Nothing)) => buf = &buf[bytes..], Ok((bytes, result)) => { buf = &buf[bytes..]; @@ -600,6 +601,7 @@ impl StreamingDecoder { &mut self, buf: &[u8], image_data: &mut Vec, + out_pos: &mut usize, ) -> Result<(usize, Decoded), DecodingError> { use self::State::*; @@ -620,7 +622,7 @@ impl StreamingDecoder { // values is that they occur fairly frequently and special-casing them results // in performance gains. const CONSUMED_BYTES: usize = 4; - self.parse_u32(kind, &buf[0..4], image_data) + self.parse_u32(kind, &buf[0..4], image_data, out_pos) .map(|decoded| (CONSUMED_BYTES, decoded)) } else { let remaining_count = 4 - accumulated_count; @@ -641,7 +643,7 @@ impl StreamingDecoder { Ok((consumed_bytes, Decoded::Nothing)) } else { debug_assert_eq!(accumulated_count, 4); - self.parse_u32(kind, &bytes, image_data) + self.parse_u32(kind, &bytes, image_data, out_pos) .map(|decoded| (consumed_bytes, decoded)) } } @@ -700,7 +702,7 @@ impl StreamingDecoder { debug_assert!(type_str == IDAT || type_str == chunk::fdAT); let len = std::cmp::min(buf.len(), self.current_chunk.remaining as usize); let buf = &buf[..len]; - let consumed = self.inflater.decompress(buf, image_data)?; + let consumed = self.inflater.decompress(buf, image_data, out_pos)?; self.current_chunk.crc.update(&buf[..consumed]); self.current_chunk.remaining -= consumed as u32; if self.current_chunk.remaining == 0 { @@ -718,6 +720,7 @@ impl StreamingDecoder { kind: U32ValueKind, u32_be_bytes: &[u8], image_data: &mut Vec, + out_pos: &mut usize, ) -> Result { debug_assert_eq!(u32_be_bytes.len(), 4); let bytes = u32_be_bytes.try_into().unwrap(); @@ -759,7 +762,7 @@ impl StreamingDecoder { && (self.current_chunk.type_ == IDAT || self.current_chunk.type_ == chunk::fdAT) { self.current_chunk.type_ = type_str; - self.inflater.finish_compressed_chunks(image_data)?; + self.inflater.finish_compressed_chunks(image_data, out_pos)?; self.inflater.reset(); self.state = Some(State::U32 { kind, diff --git a/src/decoder/zlib.rs b/src/decoder/zlib.rs index 1693154e..6ba9fc3a 100644 --- a/src/decoder/zlib.rs +++ b/src/decoder/zlib.rs @@ -1,4 +1,4 @@ -use super::{stream::FormatErrorInner, DecodingError, CHUNCK_BUFFER_SIZE}; +use super::{stream::FormatErrorInner, DecodingError}; use fdeflate::Decompressor; @@ -8,12 +8,6 @@ pub(super) struct ZlibStream { state: Box, /// If there has been a call to decompress already. started: bool, - /// Remaining buffered decoded bytes. - /// The decoder sometimes wants inspect some already finished bytes for further decoding. So we - /// keep a total of 32KB of decoded data available as long as more data may be appended. - out_buffer: Vec, - /// The first index of `out_buffer` where new data can be written. - out_pos: usize, /// The first index of `out_buffer` that hasn't yet been passed to our client /// (i.e. not yet appended to the `image_data` parameter of `fn decompress` or `fn /// finish_compressed_chunks`). @@ -35,8 +29,6 @@ impl ZlibStream { ZlibStream { state: Box::new(Decompressor::new()), started: false, - out_buffer: Vec::new(), - out_pos: 0, read_pos: 0, max_total_output: usize::MAX, ignore_adler32: true, @@ -45,8 +37,6 @@ impl ZlibStream { pub(crate) fn reset(&mut self) { self.started = false; - self.out_buffer.clear(); - self.out_pos = 0; self.read_pos = 0; self.max_total_output = usize::MAX; *self.state = Decompressor::new(); @@ -83,6 +73,7 @@ impl ZlibStream { &mut self, data: &[u8], image_data: &mut Vec, + out_pos: &mut usize, ) -> Result { // There may be more data past the adler32 checksum at the end of the deflate stream. We // match libpng's default behavior and ignore any trailing data. In the future we may want @@ -91,23 +82,24 @@ impl ZlibStream { return Ok(data.len()); } - self.prepare_vec_for_appending(); - if !self.started && self.ignore_adler32 { self.state.ignore_adler32(); } + if image_data.len() == *out_pos { + image_data.resize(image_data.len() + 1024, 0u8); + } + + assert!(image_data.len() > *out_pos, "len: {}, out_pos: {}", image_data.len(), *out_pos); let (in_consumed, out_consumed) = self .state - .read(data, self.out_buffer.as_mut_slice(), self.out_pos, false) + .read(data, image_data, *out_pos, false) .map_err(|err| { DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into()) })?; self.started = true; - self.out_pos += out_consumed; - self.transfer_finished_data(image_data); - self.compact_out_buffer_if_needed(); + *out_pos += out_consumed; Ok(in_consumed) } @@ -120,111 +112,27 @@ impl ZlibStream { pub(crate) fn finish_compressed_chunks( &mut self, image_data: &mut Vec, + out_pos: &mut usize, ) -> Result<(), DecodingError> { if !self.started { return Ok(()); } while !self.state.is_done() { - self.prepare_vec_for_appending(); + if image_data.len() == *out_pos { + image_data.resize(image_data.len() + 1024, 0u8); + } + + assert!(image_data.len() > *out_pos); let (_in_consumed, out_consumed) = self .state - .read(&[], self.out_buffer.as_mut_slice(), self.out_pos, true) + .read(&[], image_data, *out_pos, true) .map_err(|err| { DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into()) })?; - - self.out_pos += out_consumed; - - if !self.state.is_done() { - let transferred = self.transfer_finished_data(image_data); - assert!( - transferred > 0 || out_consumed > 0, - "No more forward progress made in stream decoding." - ); - self.compact_out_buffer_if_needed(); - } + *out_pos += out_consumed; } - self.transfer_finished_data(image_data); - self.out_buffer.clear(); Ok(()) } - - /// Resize the vector to allow allocation of more data. - fn prepare_vec_for_appending(&mut self) { - // The `debug_assert` below explains why we can use `>=` instead of `>` in the condition - // that compares `self.out_post >= self.max_total_output` in the next `if` statement. - debug_assert!(!self.state.is_done()); - if self.out_pos >= self.max_total_output { - // This can happen when the `max_total_output` was miscalculated (e.g. - // because the `IHDR` chunk was malformed and didn't match the `IDAT` chunk). In - // this case, let's reset `self.max_total_output` before further calculations. - self.max_total_output = usize::MAX; - } - - let current_len = self.out_buffer.len(); - let desired_len = self - .out_pos - .saturating_add(CHUNCK_BUFFER_SIZE) - .min(self.max_total_output); - if current_len >= desired_len { - return; - } - - let buffered_len = self.decoding_size(self.out_buffer.len()); - debug_assert!(self.out_buffer.len() <= buffered_len); - self.out_buffer.resize(buffered_len, 0u8); - } - - fn decoding_size(&self, len: usize) -> usize { - // Allocate one more chunk size than currently or double the length while ensuring that the - // allocation is valid and that any cursor within it will be valid. - len - // This keeps the buffer size a power-of-two, required by miniz_oxide. - .saturating_add(CHUNCK_BUFFER_SIZE.max(len)) - // Ensure all buffer indices are valid cursor positions. - // Note: both cut off and zero extension give correct results. - .min(u64::max_value() as usize) - // Ensure the allocation request is valid. - // TODO: maximum allocation limits? - .min(isize::max_value() as usize) - // Don't unnecessarily allocate more than `max_total_output`. - .min(self.max_total_output) - } - - fn transfer_finished_data(&mut self, image_data: &mut Vec) -> usize { - let transferred = &self.out_buffer[self.read_pos..self.out_pos]; - image_data.extend_from_slice(transferred); - self.read_pos = self.out_pos; - transferred.len() - } - - fn compact_out_buffer_if_needed(&mut self) { - // [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that - // "deflate/inflate compression with a sliding window (which is an upper bound on the - // distances appearing in the deflate stream) of at most 32768 bytes". - // - // `fdeflate` requires that we keep this many most recently decompressed bytes in the - // `out_buffer` - this allows referring back to them when handling "length and distance - // codes" in the deflate stream). - const LOOKBACK_SIZE: usize = 32768; - - // Compact `self.out_buffer` when "needed". Doing this conditionally helps to put an upper - // bound on the amortized cost of copying the data within `self.out_buffer`. - // - // TODO: The factor of 4 is an ad-hoc heuristic. Consider measuring and using a different - // factor. (Early experiments seem to indicate that factor of 4 is faster than a factor of - // 2 and 4 * `LOOKBACK_SIZE` seems like an acceptable memory trade-off. Higher factors - // result in higher memory usage, but the compaction cost is lower - factor of 4 means - // that 1 byte gets copied during compaction for 3 decompressed bytes.) - if self.out_pos > LOOKBACK_SIZE * 4 { - // Only preserve the `lookback_buffer` and "throw away" the earlier prefix. - let lookback_buffer = self.out_pos.saturating_sub(LOOKBACK_SIZE)..self.out_pos; - let preserved_len = lookback_buffer.len(); - self.out_buffer.copy_within(lookback_buffer, 0); - self.read_pos = preserved_len; - self.out_pos = preserved_len; - } - } }