From 96e361f6f4319782159164ceac6e1d660e383fcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Vicente=20Milack?= Date: Mon, 4 Mar 2019 10:37:15 -0300 Subject: [PATCH 1/2] Fix buffer invalidation for BufRead There are two moments when a BufRead object needs to empty it's internal buffer: - In a seek call; - In a read call when all data in the internal buffer had been already consumed and the output buffer has a greater or equal size than the internal buffer. In both cases, the buffer was not being properly emptied, but only marked as consumed (self.pos = self.cap). That should be no problem if the inner reader is only Read, but if it is Seek as well, then it's possible to access the data in the buffer by using the seek_relative method. In order to prevent this from happening, both self.pos and self.cap should be set to 0. Two test cases were added to detect that failure: - test_buffered_reader_invalidated_after_read - test_buffered_reader_invalidated_after_seek Both tests are very similar to each other. The inner reader contains the following data: [5, 6, 7, 0, 1, 2, 3, 4]. The buffer capacity is 3 bytes. - First, we call fill_buffer, which loads [5, 6, 7] into the internal buffer, and then consume those 3 bytes. - Then we either read the 5 remaining bytes in a single read call or we move to the end of the stream by calling seek. In both cases the buffer should be emptied to prevent the previous data [5, 6, 7] from being read. - We now call seek_relative(-2) and read two bytes, which should give us the last 2 bytes of the stream: [3, 4]. Before this commit, the the seek_relative method would consider that we're still in the range of the internal buffer, so instead of fetching data from the inner reader, it would return the two last bytes that were incorrectly still in the buffer: [6, 7]. Therefore, the test would fail. Now, when seek_relative is called the buffer is empty. So the expected data [3, 4] is fetched from the inner reader and the test passes. --- src/libstd/io/buffered.rs | 45 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs index 559a54d3c8aca..365b1e5974737 100644 --- a/src/libstd/io/buffered.rs +++ b/src/libstd/io/buffered.rs @@ -225,6 +225,9 @@ impl Read for BufReader { // (larger than our internal buffer), bypass our internal buffer // entirely. if self.pos == self.cap && buf.len() >= self.buf.len() { + // Empty the buffer + self.cap = 0; + self.pos = 0; return self.inner.read(buf); } let nread = { @@ -323,14 +326,18 @@ impl Seek for BufReader { } else { // seek backwards by our remainder, and then by the offset self.inner.seek(SeekFrom::Current(-remainder))?; - self.pos = self.cap; // empty the buffer + // Empty the buffer + self.cap = 0; + self.pos = 0; result = self.inner.seek(SeekFrom::Current(n))?; } } else { // Seeking with Start/End doesn't care about our buffer length. result = self.inner.seek(pos)?; } - self.pos = self.cap; // empty the buffer + // Empty the buffer + self.cap = 0; + self.pos = 0; Ok(result) } } @@ -1066,6 +1073,40 @@ mod tests { assert_eq!(reader.fill_buf().ok(), Some(&[2, 3][..])); } + #[test] + fn test_buffered_reader_invalidated_after_read() { + let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4]; + let mut reader = BufReader::with_capacity(3, io::Cursor::new(inner)); + + assert_eq!(reader.fill_buf().ok(), Some(&[5, 6, 7][..])); + reader.consume(3); + + let mut buffer = [0, 0, 0, 0, 0]; + assert_eq!(reader.read(&mut buffer).ok(), Some(5)); + assert_eq!(buffer, [0, 1, 2, 3, 4]); + + assert!(reader.seek_relative(-2).is_ok()); + let mut buffer = [0, 0]; + assert_eq!(reader.read(&mut buffer).ok(), Some(2)); + assert_eq!(buffer, [3, 4]); + } + + #[test] + fn test_buffered_reader_invalidated_after_seek() { + let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4]; + let mut reader = BufReader::with_capacity(3, io::Cursor::new(inner)); + + assert_eq!(reader.fill_buf().ok(), Some(&[5, 6, 7][..])); + reader.consume(3); + + assert!(reader.seek(SeekFrom::Current(5)).is_ok()); + + assert!(reader.seek_relative(-2).is_ok()); + let mut buffer = [0, 0]; + assert_eq!(reader.read(&mut buffer).ok(), Some(2)); + assert_eq!(buffer, [3, 4]); + } + #[test] fn test_buffered_reader_seek_underflow() { // gimmick reader that yields its position modulo 256 for each byte From c36d91c5cc446a4688186be1071e0e139ba9d38f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Vicente=20Milack?= Date: Wed, 6 Mar 2019 16:22:28 -0300 Subject: [PATCH 2/2] Fix buffer invalidation at BufReader.read_vectored --- src/libstd/io/buffered.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs index 365b1e5974737..fd4b582d7f2da 100644 --- a/src/libstd/io/buffered.rs +++ b/src/libstd/io/buffered.rs @@ -191,6 +191,13 @@ impl BufReader { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn into_inner(self) -> R { self.inner } + + /// Invalidates all data in the internal buffer. + #[inline] + fn discard_buffer(&mut self) { + self.pos = 0; + self.cap = 0; + } } impl BufReader { @@ -225,9 +232,7 @@ impl Read for BufReader { // (larger than our internal buffer), bypass our internal buffer // entirely. if self.pos == self.cap && buf.len() >= self.buf.len() { - // Empty the buffer - self.cap = 0; - self.pos = 0; + self.discard_buffer(); return self.inner.read(buf); } let nread = { @@ -241,6 +246,7 @@ impl Read for BufReader { fn read_vectored(&mut self, bufs: &mut [IoVecMut<'_>]) -> io::Result { let total_len = bufs.iter().map(|b| b.len()).sum::(); if self.pos == self.cap && total_len >= self.buf.len() { + self.discard_buffer(); return self.inner.read_vectored(bufs); } let nread = { @@ -326,18 +332,14 @@ impl Seek for BufReader { } else { // seek backwards by our remainder, and then by the offset self.inner.seek(SeekFrom::Current(-remainder))?; - // Empty the buffer - self.cap = 0; - self.pos = 0; + self.discard_buffer(); result = self.inner.seek(SeekFrom::Current(n))?; } } else { // Seeking with Start/End doesn't care about our buffer length. result = self.inner.seek(pos)?; } - // Empty the buffer - self.cap = 0; - self.pos = 0; + self.discard_buffer(); Ok(result) } }