Codegen fewer instructions in mem::replace

This commit is contained in:
Scott McMurray 2023-04-29 23:32:40 -07:00
parent ca3f742ff6
commit 5292d48b85
13 changed files with 154 additions and 78 deletions

View file

@ -381,6 +381,7 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
sym::unlikely => (0, vec![tcx.types.bool], tcx.types.bool),
sym::read_via_copy => (1, vec![tcx.mk_imm_ptr(param(0))], param(0)),
sym::write_via_move => (1, vec![tcx.mk_mut_ptr(param(0)), param(0)], tcx.mk_unit()),
sym::discriminant_value => {
let assoc_items = tcx.associated_item_def_ids(

View file

@ -179,6 +179,29 @@ impl<'tcx> MirPass<'tcx> for LowerIntrinsics {
}
}
}
sym::write_via_move => {
let target = target.unwrap();
let Ok([ptr, val]) = <[_; 2]>::try_from(std::mem::take(args)) else {
span_bug!(
terminator.source_info.span,
"Wrong number of arguments for write_via_move intrinsic",
);
};
let derefed_place =
if let Some(place) = ptr.place() && let Some(local) = place.as_local() {
tcx.mk_place_deref(local.into())
} else {
span_bug!(terminator.source_info.span, "Only passing a local is supported");
};
block.statements.push(Statement {
source_info: terminator.source_info,
kind: StatementKind::Assign(Box::new((
derefed_place,
Rvalue::Use(val),
))),
});
terminator.kind = TerminatorKind::Goto { target };
}
sym::discriminant_value => {
if let (Some(target), Some(arg)) = (*target, args[0].place()) {
let arg = tcx.mk_place_deref(arg);

View file

@ -1635,6 +1635,7 @@ symbols! {
write_bytes,
write_macro,
write_str,
write_via_move,
writeln_macro,
x87_reg,
xer,

View file

@ -2257,12 +2257,23 @@ extern "rust-intrinsic" {
/// This is an implementation detail of [`crate::ptr::read`] and should
/// not be used anywhere else. See its comments for why this exists.
///
/// This intrinsic can *only* be called where the argument is a local without
/// projections (`read_via_copy(p)`, not `read_via_copy(*p)`) so that it
/// This intrinsic can *only* be called where the pointer is a local without
/// projections (`read_via_copy(ptr)`, not `read_via_copy(*ptr)`) so that it
/// trivially obeys runtime-MIR rules about derefs in operands.
#[rustc_const_unstable(feature = "const_ptr_read", issue = "80377")]
#[rustc_nounwind]
pub fn read_via_copy<T>(p: *const T) -> T;
pub fn read_via_copy<T>(ptr: *const T) -> T;
/// This is an implementation detail of [`crate::ptr::write`] and should
/// not be used anywhere else. See its comments for why this exists.
///
/// This intrinsic can *only* be called where the pointer is a local without
/// projections (`write_via_move(ptr, x)`, not `write_via_move(*ptr, x)`) so
/// that it trivially obeys runtime-MIR rules about derefs in operands.
#[cfg(not(bootstrap))]
#[rustc_const_unstable(feature = "const_ptr_write", issue = "86302")]
#[rustc_nounwind]
pub fn write_via_move<T>(ptr: *mut T, value: T);
/// Returns the value of the discriminant for the variant in 'v';
/// if `T` has no discriminant, returns `0`.
@ -2828,3 +2839,16 @@ pub const unsafe fn transmute_unchecked<Src, Dst>(src: Src) -> Dst {
// SAFETY: It's a transmute -- the caller promised it's fine.
unsafe { transmute_copy(&ManuallyDrop::new(src)) }
}
/// Polyfill for bootstrap
#[cfg(bootstrap)]
pub const unsafe fn write_via_move<T>(ptr: *mut T, value: T) {
use crate::mem::*;
// SAFETY: the caller must guarantee that `dst` is valid for writes.
// `dst` cannot overlap `src` because the caller has mutable access
// to `dst` while `src` is owned by this function.
unsafe {
copy_nonoverlapping::<T>(&value, ptr, 1);
forget(value);
}
}

View file

@ -1349,13 +1349,13 @@ pub const unsafe fn read_unaligned<T>(src: *const T) -> T {
#[rustc_const_unstable(feature = "const_ptr_write", issue = "86302")]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub const unsafe fn write<T>(dst: *mut T, src: T) {
// We are calling the intrinsics directly to avoid function calls in the generated code
// as `intrinsics::copy_nonoverlapping` is a wrapper function.
extern "rust-intrinsic" {
#[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.63.0")]
#[rustc_nounwind]
fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
}
// Semantically, it would be fine for this to be implemented as a
// `copy_nonoverlapping` and appropriate drop suppression of `src`.
// However, implementing via that currently produces more MIR than is ideal.
// Using an intrinsic keeps it down to just the simple `*dst = move src` in
// MIR (11 statements shorter, at the time of writing), and also allows
// `src` to stay an SSA value in codegen_ssa, rather than a memory one.
// SAFETY: the caller must guarantee that `dst` is valid for writes.
// `dst` cannot overlap `src` because the caller has mutable access
@ -1365,8 +1365,7 @@ pub const unsafe fn write<T>(dst: *mut T, src: T) {
"ptr::write requires that the pointer argument is aligned and non-null",
[T](dst: *mut T) => is_aligned_and_not_null(dst)
);
copy_nonoverlapping(&src as *const T, dst, 1);
intrinsics::forget(src);
intrinsics::write_via_move(dst, src)
}
}

View file

@ -11,7 +11,9 @@
#[repr(C, align(8))]
pub struct Big([u64; 7]);
pub fn replace_big(dst: &mut Big, src: Big) -> Big {
// Before the `read_via_copy` intrinsic, this emitted six `memcpy`s.
// Back in 1.68, this emitted six `memcpy`s.
// `read_via_copy` in 1.69 got that down to three.
// `write_via_move` has it down to just the two essential ones.
std::mem::replace(dst, src)
}
@ -20,17 +22,13 @@ pub fn replace_big(dst: &mut Big, src: Big) -> Big {
// CHECK-NOT: call void @llvm.memcpy
// For a large type, we expect exactly three `memcpy`s
// For a large type, we expect exactly two `memcpy`s
// CHECK-LABEL: define internal void @{{.+}}mem{{.+}}replace{{.+}}sret(%Big)
// CHECK-NOT: alloca
// CHECK: alloca %Big
// CHECK-NOT: alloca
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 8 %{{.*}}, {{i8\*|ptr}} align 8 %{{.*}}, i{{.*}} 56, i1 false)
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 8 %0, {{i8\*|ptr}} align 8 %dest, i{{.*}} 56, i1 false)
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 8 %{{.*}}, {{i8\*|ptr}} align 8 %{{.*}}, i{{.*}} 56, i1 false)
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 8 %{{.*}}, {{i8\*|ptr}} align 8 %{{.*}}, i{{.*}} 56, i1 false)
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 8 %dest, {{i8\*|ptr}} align 8 %src, i{{.*}} 56, i1 false)
// CHECK-NOT: call void @llvm.memcpy
// CHECK-NOT: call void @llvm.memcpy

View file

@ -1,33 +0,0 @@
// This test ensures that `mem::replace::<T>` only ever calls `@llvm.memcpy`
// with `size_of::<T>()` as the size, and never goes through any wrapper that
// may e.g. multiply `size_of::<T>()` with a variable "count" (which is only
// known to be `1` after inlining).
// compile-flags: -C no-prepopulate-passes -Zinline-mir=no
// ignore-debug: the debug assertions get in the way
#![crate_type = "lib"]
pub fn replace_byte(dst: &mut u8, src: u8) -> u8 {
std::mem::replace(dst, src)
}
// NOTE(eddyb) the `CHECK-NOT`s ensure that the only calls of `@llvm.memcpy` in
// the entire output, are the direct calls we want, from `ptr::replace`.
// CHECK-NOT: call void @llvm.memcpy
// For a small type, we expect one each of `load`/`store`/`memcpy` instead
// CHECK-LABEL: define internal noundef i8 @{{.+}}mem{{.+}}replace
// CHECK-NOT: alloca
// CHECK: alloca i8
// CHECK-NOT: alloca
// CHECK-NOT: call void @llvm.memcpy
// CHECK: load i8
// CHECK-NOT: call void @llvm.memcpy
// CHECK: store i8
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 1 %{{.*}}, {{i8\*|ptr}} align 1 %{{.*}}, i{{.*}} 1, i1 false)
// CHECK-NOT: call void @llvm.memcpy
// CHECK-NOT: call void @llvm.memcpy

View file

@ -0,0 +1,34 @@
// compile-flags: -O -C no-prepopulate-passes
// min-llvm-version: 15.0 (for opaque pointers)
// only-x86_64 (to not worry about usize differing)
// ignore-debug (the debug assertions get in the way)
#![crate_type = "lib"]
#[no_mangle]
// CHECK-LABEL: @replace_usize(
pub fn replace_usize(r: &mut usize, v: usize) -> usize {
// CHECK-NOT: alloca
// CHECK: %[[R:.+]] = load i64, ptr %r
// CHECK: store i64 %v, ptr %r
// CHECK: ret i64 %[[R]]
std::mem::replace(r, v)
}
#[no_mangle]
// CHECK-LABEL: @replace_ref_str(
pub fn replace_ref_str<'a>(r: &mut &'a str, v: &'a str) -> &'a str {
// CHECK-NOT: alloca
// CHECK: %[[A:.+]] = load ptr
// CHECK: %[[B:.+]] = load i64
// CHECK-NOT: store
// CHECK-NOT: load
// CHECK: store ptr
// CHECK: store i64
// CHECK-NOT: load
// CHECK-NOT: store
// CHECK: %[[P1:.+]] = insertvalue { ptr, i64 } poison, ptr %[[A]], 0
// CHECK: %[[P2:.+]] = insertvalue { ptr, i64 } %[[P1]], i64 %[[B]], 1
// CHECK: ret { ptr, i64 } %[[P2]]
std::mem::replace(r, v)
}

View file

@ -24,7 +24,7 @@
_4 = &raw const (*_1); // scope 1 at $DIR/lower_intrinsics.rs:+2:55: +2:56
- _3 = option_payload_ptr::<usize>(move _4) -> [return: bb1, unwind unreachable]; // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
- // mir::Constant
- // + span: $DIR/lower_intrinsics.rs:132:18: 132:54
- // + span: $DIR/lower_intrinsics.rs:137:18: 137:54
- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const Option<usize>) -> *const usize {option_payload_ptr::<usize>}, val: Value(<ZST>) }
+ _3 = &raw const (((*_4) as Some).0: usize); // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
+ goto -> bb1; // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
@ -37,7 +37,7 @@
_6 = &raw const (*_2); // scope 2 at $DIR/lower_intrinsics.rs:+3:55: +3:56
- _5 = option_payload_ptr::<String>(move _6) -> [return: bb2, unwind unreachable]; // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57
- // mir::Constant
- // + span: $DIR/lower_intrinsics.rs:133:18: 133:54
- // + span: $DIR/lower_intrinsics.rs:138:18: 138:54
- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const Option<String>) -> *const String {option_payload_ptr::<String>}, val: Value(<ZST>) }
+ _5 = &raw const (((*_6) as Some).0: std::string::String); // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57
+ goto -> bb2; // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57

View file

@ -15,7 +15,7 @@
_4 = _2; // scope 0 at $DIR/lower_intrinsics.rs:+1:33: +1:34
- _0 = offset::<*const i32, isize>(move _3, move _4) -> [return: bb1, unwind unreachable]; // scope 0 at $DIR/lower_intrinsics.rs:+1:5: +1:35
- // mir::Constant
- // + span: $DIR/lower_intrinsics.rs:139:5: 139:29
- // + span: $DIR/lower_intrinsics.rs:144:5: 144:29
- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const i32, isize) -> *const i32 {offset::<*const i32, isize>}, val: Value(<ZST>) }
+ _0 = Offset(move _3, move _4); // scope 0 at $DIR/lower_intrinsics.rs:+1:5: +1:35
+ goto -> bb1; // scope 0 at $DIR/lower_intrinsics.rs:+1:5: +1:35

View file

@ -124,6 +124,11 @@ pub fn read_via_copy_uninhabited(r: &Never) -> Never {
unsafe { core::intrinsics::read_via_copy(r) }
}
// EMIT_MIR lower_intrinsics.write_via_move_string.LowerIntrinsics.diff
pub fn write_via_move_string(r: &mut String, v: String) {
unsafe { core::intrinsics::write_via_move(r, v) }
}
pub enum Never {}
// EMIT_MIR lower_intrinsics.option_payload.LowerIntrinsics.diff

View file

@ -0,0 +1,36 @@
- // MIR for `write_via_move_string` before LowerIntrinsics
+ // MIR for `write_via_move_string` after LowerIntrinsics
fn write_via_move_string(_1: &mut String, _2: String) -> () {
debug r => _1; // in scope 0 at $DIR/lower_intrinsics.rs:+0:30: +0:31
debug v => _2; // in scope 0 at $DIR/lower_intrinsics.rs:+0:46: +0:47
let mut _0: (); // return place in scope 0 at $DIR/lower_intrinsics.rs:+0:57: +0:57
let mut _3: *mut std::string::String; // in scope 0 at $DIR/lower_intrinsics.rs:+1:47: +1:48
let mut _4: std::string::String; // in scope 0 at $DIR/lower_intrinsics.rs:+1:50: +1:51
scope 1 {
}
bb0: {
StorageLive(_3); // scope 1 at $DIR/lower_intrinsics.rs:+1:47: +1:48
_3 = &raw mut (*_1); // scope 1 at $DIR/lower_intrinsics.rs:+1:47: +1:48
StorageLive(_4); // scope 1 at $DIR/lower_intrinsics.rs:+1:50: +1:51
_4 = move _2; // scope 1 at $DIR/lower_intrinsics.rs:+1:50: +1:51
- _0 = write_via_move::<String>(move _3, move _4) -> [return: bb1, unwind unreachable]; // scope 1 at $DIR/lower_intrinsics.rs:+1:14: +1:52
- // mir::Constant
- // + span: $DIR/lower_intrinsics.rs:129:14: 129:46
- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*mut String, String) {write_via_move::<String>}, val: Value(<ZST>) }
+ (*_3) = move _4; // scope 1 at $DIR/lower_intrinsics.rs:+1:14: +1:52
+ goto -> bb1; // scope 1 at $DIR/lower_intrinsics.rs:+1:14: +1:52
}
bb1: {
StorageDead(_4); // scope 1 at $DIR/lower_intrinsics.rs:+1:51: +1:52
StorageDead(_3); // scope 1 at $DIR/lower_intrinsics.rs:+1:51: +1:52
goto -> bb2; // scope 0 at $DIR/lower_intrinsics.rs:+2:1: +2:2
}
bb2: {
return; // scope 0 at $DIR/lower_intrinsics.rs:+2:2: +2:2
}
}

View file

@ -9,29 +9,26 @@ fn mem_replace(_1: &mut u32, _2: u32) -> u32 {
debug src => _2; // in scope 1 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
let mut _3: *const u32; // in scope 1 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
let mut _4: *mut u32; // in scope 1 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
let mut _5: u32; // in scope 1 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
scope 2 {
scope 3 {
debug result => _0; // in scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
scope 7 (inlined std::ptr::write::<u32>) { // at $SRC_DIR/core/src/mem/mod.rs:LL:COL
debug dst => _4; // in scope 7 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
debug src => _5; // in scope 7 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
let mut _7: *const u32; // in scope 7 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
let _8: &u32; // in scope 7 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
let mut _9: *mut u32; // in scope 7 at $SRC_DIR/core/src/intrinsics.rs:LL:COL
debug src => _2; // in scope 7 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
let mut _6: *mut u32; // in scope 7 at $SRC_DIR/core/src/intrinsics.rs:LL:COL
scope 8 {
scope 9 (inlined std::ptr::write::runtime::<u32>) { // at $SRC_DIR/core/src/intrinsics.rs:LL:COL
debug dst => _9; // in scope 9 at $SRC_DIR/core/src/intrinsics.rs:LL:COL
debug dst => _6; // in scope 9 at $SRC_DIR/core/src/intrinsics.rs:LL:COL
}
}
}
}
scope 4 (inlined std::ptr::read::<u32>) { // at $SRC_DIR/core/src/mem/mod.rs:LL:COL
debug src => _3; // in scope 4 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
let mut _6: *const u32; // in scope 4 at $SRC_DIR/core/src/intrinsics.rs:LL:COL
let mut _5: *const u32; // in scope 4 at $SRC_DIR/core/src/intrinsics.rs:LL:COL
scope 5 {
scope 6 (inlined std::ptr::read::runtime::<u32>) { // at $SRC_DIR/core/src/intrinsics.rs:LL:COL
debug src => _6; // in scope 6 at $SRC_DIR/core/src/intrinsics.rs:LL:COL
debug src => _5; // in scope 6 at $SRC_DIR/core/src/intrinsics.rs:LL:COL
}
}
}
@ -41,24 +38,15 @@ fn mem_replace(_1: &mut u32, _2: u32) -> u32 {
bb0: {
StorageLive(_3); // scope 2 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
_3 = &raw const (*_1); // scope 2 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageLive(_6); // scope 2 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageLive(_5); // scope 2 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
_0 = (*_3); // scope 5 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
StorageDead(_6); // scope 2 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageDead(_5); // scope 2 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageDead(_3); // scope 2 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageLive(_4); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
_4 = &raw mut (*_1); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageLive(_5); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
_5 = _2; // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageLive(_9); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageLive(_7); // scope 8 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
StorageLive(_8); // scope 8 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
_8 = &_5; // scope 8 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
_7 = &raw const (*_8); // scope 8 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
copy_nonoverlapping(dst = _4, src = move _7, count = const 1_usize); // scope 8 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
StorageDead(_7); // scope 8 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
StorageDead(_8); // scope 8 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
StorageDead(_9); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageDead(_5); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageLive(_6); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
(*_4) = _2; // scope 8 at $SRC_DIR/core/src/ptr/mod.rs:LL:COL
StorageDead(_6); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
StorageDead(_4); // scope 3 at $SRC_DIR/core/src/mem/mod.rs:LL:COL
return; // scope 0 at $DIR/mem_replace.rs:+2:2: +2:2
}