Rename and document the new BufReader internals

This commit is contained in:
Ben Kimock 2022-07-24 12:50:02 -04:00
parent b9497be7d0
commit 5e5ce4327a
2 changed files with 33 additions and 29 deletions

View file

@ -238,7 +238,7 @@ impl<R: Seek> BufReader<R> {
return Ok(());
}
} else if let Some(new_pos) = pos.checked_add(offset as u64) {
if new_pos <= self.buf.cap() as u64 {
if new_pos <= self.buf.filled() as u64 {
self.buf.consume(offset as usize);
return Ok(());
}
@ -254,7 +254,7 @@ impl<R: Read> Read for BufReader<R> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.buf.pos() == self.buf.cap() && buf.len() >= self.capacity() {
if self.buf.pos() == self.buf.filled() && buf.len() >= self.capacity() {
self.discard_buffer();
return self.inner.read(buf);
}
@ -270,7 +270,7 @@ impl<R: Read> Read for BufReader<R> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.buf.pos() == self.buf.cap() && buf.remaining() >= self.capacity() {
if self.buf.pos() == self.buf.filled() && buf.remaining() >= self.capacity() {
self.discard_buffer();
return self.inner.read_buf(buf);
}
@ -301,7 +301,7 @@ impl<R: Read> Read for BufReader<R> {
fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
if self.buf.pos() == self.buf.cap() && total_len >= self.capacity() {
if self.buf.pos() == self.buf.filled() && total_len >= self.capacity() {
self.discard_buffer();
return self.inner.read_vectored(bufs);
}
@ -385,7 +385,7 @@ where
.field("reader", &self.inner)
.field(
"buffer",
&format_args!("{}/{}", self.buf.cap() - self.buf.pos(), self.capacity()),
&format_args!("{}/{}", self.buf.filled() - self.buf.pos(), self.capacity()),
)
.finish()
}
@ -418,7 +418,7 @@ impl<R: Seek> Seek for BufReader<R> {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
let result: u64;
if let SeekFrom::Current(n) = pos {
let remainder = (self.buf.cap() - self.buf.pos()) as i64;
let remainder = (self.buf.filled() - self.buf.pos()) as i64;
// it should be safe to assume that remainder fits within an i64 as the alternative
// means we managed to allocate 8 exbibytes and that's absurd.
// But it's not out of the realm of possibility for some weird underlying reader to
@ -476,7 +476,7 @@ impl<R: Seek> Seek for BufReader<R> {
/// }
/// ```
fn stream_position(&mut self) -> io::Result<u64> {
let remainder = (self.buf.cap() - self.buf.pos()) as u64;
let remainder = (self.buf.filled() - self.buf.pos()) as u64;
self.inner.stream_position().map(|pos| {
pos.checked_sub(remainder).expect(
"overflow when subtracting remaining buffer size from inner stream position",

View file

@ -1,27 +1,39 @@
///! An encapsulation of `BufReader`'s buffer management logic.
///
/// This module factors out the basic functionality of `BufReader` in order to protect two core
/// invariants:
/// * `filled` bytes of `buf` are always initialized
/// * `pos` is always <= `filled`
/// Since this module encapsulates the buffer management logic, we can ensure that the range
/// `pos..filled` is always a valid index into the initialized region of the buffer. This means
/// that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so
/// without encountering any runtime bounds checks.
use crate::cmp;
use crate::io::{self, Read, ReadBuf};
use crate::mem::MaybeUninit;
pub struct Buffer {
// The buffer.
buf: Box<[MaybeUninit<u8>]>,
// The current seek offset into `buf`, must always be <= `filled`.
pos: usize,
cap: usize,
init: usize,
// Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are
// initialized with bytes from a read.
filled: usize,
}
impl Buffer {
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
let buf = Box::new_uninit_slice(capacity);
Self { buf, pos: 0, cap: 0, init: 0 }
Self { buf, pos: 0, filled: 0 }
}
#[inline]
pub fn buffer(&self) -> &[u8] {
// SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init
// Additionally, both self.pos and self.cap are valid and and self.cap => self.pos, and
// SAFETY: self.pos and self.cap are valid, and self.cap => self.pos, and
// that region is initialized because those are all invariants of this type.
unsafe { MaybeUninit::slice_assume_init_ref(&self.buf.get_unchecked(self.pos..self.cap)) }
unsafe { MaybeUninit::slice_assume_init_ref(self.buf.get_unchecked(self.pos..self.filled)) }
}
#[inline]
@ -30,8 +42,8 @@ impl Buffer {
}
#[inline]
pub fn cap(&self) -> usize {
self.cap
pub fn filled(&self) -> usize {
self.filled
}
#[inline]
@ -42,12 +54,12 @@ impl Buffer {
#[inline]
pub fn discard_buffer(&mut self) {
self.pos = 0;
self.cap = 0;
self.filled = 0;
}
#[inline]
pub fn consume(&mut self, amt: usize) {
self.pos = cmp::min(self.pos + amt, self.cap);
self.pos = cmp::min(self.pos + amt, self.filled);
}
#[inline]
@ -58,25 +70,17 @@ impl Buffer {
#[inline]
pub fn fill_buf(&mut self, mut reader: impl Read) -> io::Result<&[u8]> {
// If we've reached the end of our internal buffer then we need to fetch
// some more data from the underlying reader.
// some more data from the reader.
// Branch using `>=` instead of the more correct `==`
// to tell the compiler that the pos..cap slice is always valid.
if self.pos >= self.cap {
debug_assert!(self.pos == self.cap);
if self.pos >= self.filled {
debug_assert!(self.pos == self.filled);
let mut readbuf = ReadBuf::uninit(&mut self.buf);
// SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()`
// from the last time this function was called
unsafe {
readbuf.assume_init(self.init);
}
reader.read_buf(&mut readbuf)?;
self.cap = readbuf.filled_len();
self.init = readbuf.initialized_len();
self.filled = readbuf.filled_len();
self.pos = 0;
}
Ok(self.buffer())