From ec95d259e41a9da382cf0b35769f71fe23b49986 Mon Sep 17 00:00:00 2001 From: Lukas Markeffsky <@> Date: Thu, 23 Nov 2023 10:25:25 +0000 Subject: [PATCH 1/6] simplify codegen for trivially droppable types --- library/alloc/src/collections/vec_deque/drain.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/library/alloc/src/collections/vec_deque/drain.rs b/library/alloc/src/collections/vec_deque/drain.rs index 0be274a3822..7d40ce65ad7 100644 --- a/library/alloc/src/collections/vec_deque/drain.rs +++ b/library/alloc/src/collections/vec_deque/drain.rs @@ -96,8 +96,9 @@ impl Drop for Drain<'_, T, A> { struct DropGuard<'r, 'a, T, A: Allocator>(&'r mut Drain<'a, T, A>); impl<'r, 'a, T, A: Allocator> Drop for DropGuard<'r, 'a, T, A> { + #[inline] fn drop(&mut self) { - if self.0.remaining != 0 { + if mem::needs_drop::() && self.0.remaining != 0 { unsafe { // SAFETY: We just checked that `self.remaining != 0`. let (front, back) = self.0.as_slices(); @@ -158,7 +159,7 @@ impl Drop for Drain<'_, T, A> { } let guard = DropGuard(self); - if guard.0.remaining != 0 { + if mem::needs_drop::() && guard.0.remaining != 0 { unsafe { // SAFETY: We just checked that `self.remaining != 0`. let (front, back) = guard.0.as_slices(); From 500ef678770d9a0840e913ec02fab801ea455187 Mon Sep 17 00:00:00 2001 From: Lukas Markeffsky <@> Date: Thu, 23 Nov 2023 10:41:35 +0000 Subject: [PATCH 2/6] reduce amount of math --- .../alloc/src/collections/vec_deque/drain.rs | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/library/alloc/src/collections/vec_deque/drain.rs b/library/alloc/src/collections/vec_deque/drain.rs index 7d40ce65ad7..aa5cc0fb1ce 100644 --- a/library/alloc/src/collections/vec_deque/drain.rs +++ b/library/alloc/src/collections/vec_deque/drain.rs @@ -27,8 +27,8 @@ pub struct Drain< drain_len: usize, // index into the logical array, not the physical one (always lies in [0..deque.len)) idx: usize, - // number of elements after the drain range - tail_len: usize, + // number of elements remaining after dropping the drain + new_len: usize, remaining: usize, // Needed to make Drain covariant over T _marker: PhantomData<&'a T>, @@ -41,12 +41,12 @@ impl<'a, T, A: Allocator> Drain<'a, T, A> { drain_len: usize, ) -> Self { let orig_len = mem::replace(&mut deque.len, drain_start); - let tail_len = orig_len - drain_start - drain_len; + let new_len = orig_len - drain_len; Drain { deque: NonNull::from(deque), drain_len, idx: drain_start, - tail_len, + new_len, remaining: drain_len, _marker: PhantomData, } @@ -79,7 +79,7 @@ impl fmt::Debug for Drain<'_, T, A> { f.debug_tuple("Drain") .field(&self.drain_len) .field(&self.idx) - .field(&self.tail_len) + .field(&self.new_len) .field(&self.remaining) .finish() } @@ -111,18 +111,16 @@ impl Drop for Drain<'_, T, A> { let drain_start = source_deque.len(); let drain_len = self.0.drain_len; - let drain_end = drain_start + drain_len; - - let orig_len = self.0.tail_len + drain_end; + let new_len = self.0.new_len; if T::IS_ZST { // no need to copy around any memory if T is a ZST - source_deque.len = orig_len - drain_len; + source_deque.len = new_len; return; } let head_len = drain_start; - let tail_len = self.0.tail_len; + let tail_len = new_len - head_len; match (head_len, tail_len) { (0, 0) => { @@ -131,10 +129,10 @@ impl Drop for Drain<'_, T, A> { } (0, _) => { source_deque.head = source_deque.to_physical_idx(drain_len); - source_deque.len = orig_len - drain_len; + source_deque.len = new_len; } (_, 0) => { - source_deque.len = orig_len - drain_len; + source_deque.len = new_len; } _ => unsafe { if head_len <= tail_len { @@ -144,14 +142,14 @@ impl Drop for Drain<'_, T, A> { head_len, ); source_deque.head = source_deque.to_physical_idx(drain_len); - source_deque.len = orig_len - drain_len; + source_deque.len = new_len; } else { source_deque.wrap_copy( source_deque.to_physical_idx(head_len + drain_len), source_deque.to_physical_idx(head_len), tail_len, ); - source_deque.len = orig_len - drain_len; + source_deque.len = new_len; } }, } From 1e3849aed010c3ff2a4c7b7b644ca67d087ab4c1 Mon Sep 17 00:00:00 2001 From: Lukas Markeffsky <@> Date: Thu, 23 Nov 2023 16:03:21 +0000 Subject: [PATCH 3/6] reduce branchiness --- .../alloc/src/collections/vec_deque/drain.rs | 53 ++++++++----------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/library/alloc/src/collections/vec_deque/drain.rs b/library/alloc/src/collections/vec_deque/drain.rs index aa5cc0fb1ce..7a413ddf971 100644 --- a/library/alloc/src/collections/vec_deque/drain.rs +++ b/library/alloc/src/collections/vec_deque/drain.rs @@ -109,7 +109,6 @@ impl Drop for Drain<'_, T, A> { let source_deque = unsafe { self.0.deque.as_mut() }; - let drain_start = source_deque.len(); let drain_len = self.0.drain_len; let new_len = self.0.new_len; @@ -119,40 +118,32 @@ impl Drop for Drain<'_, T, A> { return; } - let head_len = drain_start; + let head_len = source_deque.len(); let tail_len = new_len - head_len; - match (head_len, tail_len) { - (0, 0) => { - source_deque.head = 0; - source_deque.len = 0; + if head_len != 0 && tail_len != 0 { + let (src, dst, len); + if head_len < tail_len { + src = source_deque.head; + dst = source_deque.to_physical_idx(drain_len); + len = head_len; + } else { + src = source_deque.to_physical_idx(head_len + drain_len); + dst = source_deque.to_physical_idx(head_len); + len = tail_len; + }; + + unsafe { + source_deque.wrap_copy(src, dst, len); } - (0, _) => { - source_deque.head = source_deque.to_physical_idx(drain_len); - source_deque.len = new_len; - } - (_, 0) => { - source_deque.len = new_len; - } - _ => unsafe { - if head_len <= tail_len { - source_deque.wrap_copy( - source_deque.head, - source_deque.to_physical_idx(drain_len), - head_len, - ); - source_deque.head = source_deque.to_physical_idx(drain_len); - source_deque.len = new_len; - } else { - source_deque.wrap_copy( - source_deque.to_physical_idx(head_len + drain_len), - source_deque.to_physical_idx(head_len), - tail_len, - ); - source_deque.len = new_len; - } - }, } + + if new_len == 0 { + source_deque.head = 0; + } else if head_len < tail_len { + source_deque.head = source_deque.to_physical_idx(drain_len); + } + source_deque.len = new_len; } } From b6702939f72c6c55a74fc5f343d34c01e3d8521b Mon Sep 17 00:00:00 2001 From: Lukas Markeffsky <@> Date: Thu, 23 Nov 2023 23:31:19 +0100 Subject: [PATCH 4/6] outline large copies --- .../alloc/src/collections/vec_deque/drain.rs | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/library/alloc/src/collections/vec_deque/drain.rs b/library/alloc/src/collections/vec_deque/drain.rs index 7a413ddf971..bf8c9b2d47a 100644 --- a/library/alloc/src/collections/vec_deque/drain.rs +++ b/library/alloc/src/collections/vec_deque/drain.rs @@ -121,7 +121,27 @@ impl Drop for Drain<'_, T, A> { let head_len = source_deque.len(); let tail_len = new_len - head_len; + // When draining at the front (`.drain(..n)`) or at the back (`.drain(n..)`), + // we don't need to copy any data. + // Outlining this function helps LLVM to eliminate the copies in these cases. if head_len != 0 && tail_len != 0 { + copy_data(source_deque, drain_len, head_len, tail_len); + } + + if new_len == 0 { + source_deque.head = 0; + } else if head_len < tail_len { + source_deque.head = source_deque.to_physical_idx(drain_len); + } + source_deque.len = new_len; + + #[cold] + fn copy_data( + source_deque: &mut VecDeque, + drain_len: usize, + head_len: usize, + tail_len: usize, + ) { let (src, dst, len); if head_len < tail_len { src = source_deque.head; @@ -137,13 +157,6 @@ impl Drop for Drain<'_, T, A> { source_deque.wrap_copy(src, dst, len); } } - - if new_len == 0 { - source_deque.head = 0; - } else if head_len < tail_len { - source_deque.head = source_deque.to_physical_idx(drain_len); - } - source_deque.len = new_len; } } From 8f259ade66fa051b7918412a83f2fb8ef3ded92f Mon Sep 17 00:00:00 2001 From: Lukas Markeffsky <@> Date: Thu, 23 Nov 2023 23:11:22 +0100 Subject: [PATCH 5/6] add codegen test --- tests/codegen/vecdeque-drain.rs | 69 +++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/codegen/vecdeque-drain.rs diff --git a/tests/codegen/vecdeque-drain.rs b/tests/codegen/vecdeque-drain.rs new file mode 100644 index 00000000000..f8263c69572 --- /dev/null +++ b/tests/codegen/vecdeque-drain.rs @@ -0,0 +1,69 @@ +// Check that draining at the front or back doesn't copy memory. + +// compile-flags: -O +// ignore-debug: the debug assertions get in the way + +#![crate_type = "lib"] + +use std::collections::VecDeque; + +// CHECK-LABEL: @clear +// CHECK-NOT: call +// CHECK-NOT: br +// CHECK: getelementptr inbounds +// CHECK-NEXT: {{call void @llvm.memset|store}} +// CHECK-NEXT: ret void +#[no_mangle] +pub fn clear(v: &mut VecDeque) { + v.drain(..); +} + +// CHECK-LABEL: @truncate +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK-NOT: br +// CHECK: ret void +#[no_mangle] +pub fn truncate(v: &mut VecDeque, n: usize) { + if n < v.len() { + v.drain(n..); + } +} + +// CHECK-LABEL: @advance +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK: br +// CHECK-NOT: call +// CHECK-NOT: br +// CHECK: ret void +#[no_mangle] +pub fn advance(v: &mut VecDeque, n: usize) { + if n < v.len() { + v.drain(..n); + } else { + v.clear(); + } +} + +// CHECK-LABEL: @remove +// CHECK: call +// CHECK: ret void +#[no_mangle] +pub fn remove(v: &mut VecDeque, a: usize, b: usize) { + v.drain(a..b); +} From 5d989770f285b6680a2540502fbfd0b4b21b16b3 Mon Sep 17 00:00:00 2001 From: Lukas Markeffsky <@> Date: Sun, 11 Feb 2024 17:13:23 +0100 Subject: [PATCH 6/6] address review comments --- .../alloc/src/collections/vec_deque/drain.rs | 143 ++++++++++++------ 1 file changed, 99 insertions(+), 44 deletions(-) diff --git a/library/alloc/src/collections/vec_deque/drain.rs b/library/alloc/src/collections/vec_deque/drain.rs index bf8c9b2d47a..1373e601492 100644 --- a/library/alloc/src/collections/vec_deque/drain.rs +++ b/library/alloc/src/collections/vec_deque/drain.rs @@ -95,6 +95,22 @@ impl Drop for Drain<'_, T, A> { fn drop(&mut self) { struct DropGuard<'r, 'a, T, A: Allocator>(&'r mut Drain<'a, T, A>); + let guard = DropGuard(self); + + if mem::needs_drop::() && guard.0.remaining != 0 { + unsafe { + // SAFETY: We just checked that `self.remaining != 0`. + let (front, back) = guard.0.as_slices(); + // since idx is a logical index, we don't need to worry about wrapping. + guard.0.idx += front.len(); + guard.0.remaining -= front.len(); + ptr::drop_in_place(front); + guard.0.remaining = 0; + ptr::drop_in_place(back); + } + } + + // Dropping `guard` handles moving the remaining elements into place. impl<'r, 'a, T, A: Allocator> Drop for DropGuard<'r, 'a, T, A> { #[inline] fn drop(&mut self) { @@ -118,63 +134,102 @@ impl Drop for Drain<'_, T, A> { return; } - let head_len = source_deque.len(); - let tail_len = new_len - head_len; + let head_len = source_deque.len; // #elements in front of the drain + let tail_len = new_len - head_len; // #elements behind the drain + + // Next, we will fill the hole left by the drain with as few writes as possible. + // The code below handles the following control flow and reduces the amount of + // branches under the assumption that `head_len == 0 || tail_len == 0`, i.e. + // draining at the front or at the back of the dequeue is especially common. + // + // H = "head index" = `deque.head` + // h = elements in front of the drain + // d = elements in the drain + // t = elements behind the drain + // + // Note that the buffer may wrap at any point and the wrapping is handled by + // `wrap_copy` and `to_physical_idx`. + // + // Case 1: if `head_len == 0 && tail_len == 0` + // Everything was drained, reset the head index back to 0. + // H + // [ . . . . . d d d d . . . . . ] + // H + // [ . . . . . . . . . . . . . . ] + // + // Case 2: else if `tail_len == 0` + // Don't move data or the head index. + // H + // [ . . . h h h h d d d d . . . ] + // H + // [ . . . h h h h . . . . . . . ] + // + // Case 3: else if `head_len == 0` + // Don't move data, but move the head index. + // H + // [ . . . d d d d t t t t . . . ] + // H + // [ . . . . . . . t t t t . . . ] + // + // Case 4: else if `tail_len <= head_len` + // Move data, but not the head index. + // H + // [ . . h h h h d d d d t t . . ] + // H + // [ . . h h h h t t . . . . . . ] + // + // Case 5: else + // Move data and the head index. + // H + // [ . . h h d d d d t t t t . . ] + // H + // [ . . . . . . h h t t t t . . ] // When draining at the front (`.drain(..n)`) or at the back (`.drain(n..)`), - // we don't need to copy any data. - // Outlining this function helps LLVM to eliminate the copies in these cases. + // we don't need to copy any data. The number of elements copied would be 0. if head_len != 0 && tail_len != 0 { - copy_data(source_deque, drain_len, head_len, tail_len); + join_head_and_tail_wrapping(source_deque, drain_len, head_len, tail_len); + // Marking this function as cold helps LLVM to eliminate it entirely if + // this branch is never taken. + // We use `#[cold]` instead of `#[inline(never)]`, because inlining this + // function into the general case (`.drain(n..m)`) is fine. + // See `tests/codegen/vecdeque-drain.rs` for a test. + #[cold] + fn join_head_and_tail_wrapping( + source_deque: &mut VecDeque, + drain_len: usize, + head_len: usize, + tail_len: usize, + ) { + // Pick whether to move the head or the tail here. + let (src, dst, len); + if head_len < tail_len { + src = source_deque.head; + dst = source_deque.to_physical_idx(drain_len); + len = head_len; + } else { + src = source_deque.to_physical_idx(head_len + drain_len); + dst = source_deque.to_physical_idx(head_len); + len = tail_len; + }; + + unsafe { + source_deque.wrap_copy(src, dst, len); + } + } } if new_len == 0 { + // Special case: If the entire dequeue was drained, reset the head back to 0, + // like `.clear()` does. source_deque.head = 0; } else if head_len < tail_len { + // If we moved the head above, then we need to adjust the head index here. source_deque.head = source_deque.to_physical_idx(drain_len); } source_deque.len = new_len; - - #[cold] - fn copy_data( - source_deque: &mut VecDeque, - drain_len: usize, - head_len: usize, - tail_len: usize, - ) { - let (src, dst, len); - if head_len < tail_len { - src = source_deque.head; - dst = source_deque.to_physical_idx(drain_len); - len = head_len; - } else { - src = source_deque.to_physical_idx(head_len + drain_len); - dst = source_deque.to_physical_idx(head_len); - len = tail_len; - }; - - unsafe { - source_deque.wrap_copy(src, dst, len); - } - } } } - - let guard = DropGuard(self); - if mem::needs_drop::() && guard.0.remaining != 0 { - unsafe { - // SAFETY: We just checked that `self.remaining != 0`. - let (front, back) = guard.0.as_slices(); - // since idx is a logical index, we don't need to worry about wrapping. - guard.0.idx += front.len(); - guard.0.remaining -= front.len(); - ptr::drop_in_place(front); - guard.0.remaining = 0; - ptr::drop_in_place(back); - } - } - - // Dropping `guard` handles moving the remaining elements into place. } }