From 299ac7589410a4e9541d0483161abb9e455e0ec1 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sat, 15 May 2021 18:54:57 -0400 Subject: [PATCH 1/3] Specialize single-element writes to buffer copy_from_slice generally falls back to memcpy/memmove, which is much more expensive than we need to write a single element in. This saves 0.26% instructions on the diesel benchmark. --- library/proc_macro/src/bridge/buffer.rs | 15 +++++++++++++++ library/proc_macro/src/bridge/rpc.rs | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/library/proc_macro/src/bridge/buffer.rs b/library/proc_macro/src/bridge/buffer.rs index aeecbd49662..26d210a01c5 100644 --- a/library/proc_macro/src/bridge/buffer.rs +++ b/library/proc_macro/src/bridge/buffer.rs @@ -91,6 +91,21 @@ impl Buffer { let b = self.take(); *self = (b.extend_from_slice)(b, Slice::from(xs)); } + + pub(super) fn push(&mut self, v: T) { + // Fast path to avoid going through an FFI call. + if let Some(final_len) = self.len.checked_add(1) { + if final_len <= self.capacity { + unsafe { + *self.data.add(self.len) = v; + } + self.len = final_len; + return; + } + } + let b = self.take(); + *self = (b.extend_from_slice)(b, Slice::from(std::slice::from_ref(&v))); + } } impl Write for Buffer { diff --git a/library/proc_macro/src/bridge/rpc.rs b/library/proc_macro/src/bridge/rpc.rs index 5c2f9ec9848..ee9a2cf9a97 100644 --- a/library/proc_macro/src/bridge/rpc.rs +++ b/library/proc_macro/src/bridge/rpc.rs @@ -114,7 +114,7 @@ impl DecodeMut<'_, '_, S> for () { impl Encode for u8 { fn encode(self, w: &mut Writer, _: &mut S) { - w.write_all(&[self]).unwrap(); + w.push(self); } } From 92b2894d313b5d514d077bf65560df29cda35d13 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sat, 15 May 2021 19:49:54 -0400 Subject: [PATCH 2/3] Switch to reserve over extend_from_slice This is a 0.15% win on diesel. --- library/proc_macro/src/bridge/buffer.rs | 46 +++++++++++-------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/library/proc_macro/src/bridge/buffer.rs b/library/proc_macro/src/bridge/buffer.rs index 26d210a01c5..a2030b9b8bf 100644 --- a/library/proc_macro/src/bridge/buffer.rs +++ b/library/proc_macro/src/bridge/buffer.rs @@ -39,7 +39,7 @@ pub struct Buffer { data: *mut T, len: usize, capacity: usize, - extend_from_slice: extern "C" fn(Buffer, Slice<'_, T>) -> Buffer, + reserve: extern "C" fn(Buffer, usize) -> Buffer, drop: extern "C" fn(Buffer), } @@ -79,32 +79,28 @@ impl Buffer { } pub(super) fn extend_from_slice(&mut self, xs: &[T]) { - // Fast path to avoid going through an FFI call. - if let Some(final_len) = self.len.checked_add(xs.len()) { - if final_len <= self.capacity { - let dst = unsafe { slice::from_raw_parts_mut(self.data, self.capacity) }; - dst[self.len..][..xs.len()].copy_from_slice(xs); - self.len = final_len; - return; - } + if xs.len() > self.capacity.wrapping_sub(self.len) { + let b = self.take(); + *self = (b.reserve)(b, xs.len()); + } + unsafe { + xs.as_ptr().copy_to_nonoverlapping(self.data.add(self.len), xs.len()); + self.len += xs.len(); } - let b = self.take(); - *self = (b.extend_from_slice)(b, Slice::from(xs)); } pub(super) fn push(&mut self, v: T) { - // Fast path to avoid going through an FFI call. - if let Some(final_len) = self.len.checked_add(1) { - if final_len <= self.capacity { - unsafe { - *self.data.add(self.len) = v; - } - self.len = final_len; - return; - } + // The code here is taken from Vec::push, and we know that reserve() + // will panic if we're exceeding isize::MAX bytes and so there's no need + // to check for overflow. + if self.len == self.capacity { + let b = self.take(); + *self = (b.reserve)(b, 1); + } + unsafe { + *self.data.add(self.len) = v; + self.len += 1; } - let b = self.take(); - *self = (b.extend_from_slice)(b, Slice::from(std::slice::from_ref(&v))); } } @@ -146,9 +142,9 @@ impl From> for Buffer { } } - extern "C" fn extend_from_slice(b: Buffer, xs: Slice<'_, T>) -> Buffer { + extern "C" fn reserve(b: Buffer, additional: usize) -> Buffer { let mut v = to_vec(b); - v.extend_from_slice(&xs); + v.reserve(additional); Buffer::from(v) } @@ -156,6 +152,6 @@ impl From> for Buffer { mem::drop(to_vec(b)); } - Buffer { data, len, capacity, extend_from_slice, drop } + Buffer { data, len, capacity, reserve, drop } } } From 8c2080886fa46434e097c503d3d1ee9309eadc7d Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sat, 15 May 2021 21:39:45 -0400 Subject: [PATCH 3/3] Write primitive types via array buffers This allows a more efficient implementation (avoiding a fallback to memmove, which is not optimal for short writes). This saves 0.29% on diesel. --- library/proc_macro/src/bridge/buffer.rs | 17 ++++++++++++++++- library/proc_macro/src/bridge/rpc.rs | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/library/proc_macro/src/bridge/buffer.rs b/library/proc_macro/src/bridge/buffer.rs index a2030b9b8bf..717201aef10 100644 --- a/library/proc_macro/src/bridge/buffer.rs +++ b/library/proc_macro/src/bridge/buffer.rs @@ -78,8 +78,23 @@ impl Buffer { mem::take(self) } + // We have the array method separate from extending from a slice. This is + // because in the case of small arrays, codegen can be more efficient + // (avoiding a memmove call). With extend_from_slice, LLVM at least + // currently is not able to make that optimization. + pub(super) fn extend_from_array(&mut self, xs: &[T; N]) { + if xs.len() > (self.capacity - self.len) { + let b = self.take(); + *self = (b.reserve)(b, xs.len()); + } + unsafe { + xs.as_ptr().copy_to_nonoverlapping(self.data.add(self.len), xs.len()); + self.len += xs.len(); + } + } + pub(super) fn extend_from_slice(&mut self, xs: &[T]) { - if xs.len() > self.capacity.wrapping_sub(self.len) { + if xs.len() > (self.capacity - self.len) { let b = self.take(); *self = (b.reserve)(b, xs.len()); } diff --git a/library/proc_macro/src/bridge/rpc.rs b/library/proc_macro/src/bridge/rpc.rs index ee9a2cf9a97..588e6ded0f4 100644 --- a/library/proc_macro/src/bridge/rpc.rs +++ b/library/proc_macro/src/bridge/rpc.rs @@ -27,7 +27,7 @@ macro_rules! rpc_encode_decode { (le $ty:ty) => { impl Encode for $ty { fn encode(self, w: &mut Writer, _: &mut S) { - w.write_all(&self.to_le_bytes()).unwrap(); + w.extend_from_array(&self.to_le_bytes()); } }