Rollup merge of #128149 - RalfJung:nontemporal_store, r=jieyouxu,Amanieu,Jubilee
nontemporal_store: make sure that the intrinsic is truly just a hint The `!nontemporal` flag for stores in LLVM *sounds* like it is just a hint, but actually, it is not -- at least on x86, non-temporal stores need very special treatment by the programmer or else the Rust memory model breaks down. LLVM still treats these stores as-if they were normal stores for optimizations, which is [highly dubious](https://github.com/llvm/llvm-project/issues/64521). Let's avoid all that dubiousness by making our own non-temporal stores be truly just a hint, which is possible on some targets (e.g. ARM). On all other targets, non-temporal stores become regular stores. ~~Blocked on https://github.com/rust-lang/stdarch/pull/1541 propagating to the rustc repo, to make sure the `_mm_stream` intrinsics are unaffected by this change.~~ Fixes https://github.com/rust-lang/rust/issues/114582 Cc `@Amanieu` `@workingjubilee`
This commit is contained in:
commit
095ca33bb6
5 changed files with 62 additions and 16 deletions
|
@ -725,7 +725,8 @@ fn codegen_regular_intrinsic_call<'tcx>(
|
||||||
|
|
||||||
// Cranelift treats stores as volatile by default
|
// Cranelift treats stores as volatile by default
|
||||||
// FIXME correctly handle unaligned_volatile_store
|
// FIXME correctly handle unaligned_volatile_store
|
||||||
// FIXME actually do nontemporal stores if requested
|
// FIXME actually do nontemporal stores if requested (but do not just emit MOVNT on x86;
|
||||||
|
// see the LLVM backend for details)
|
||||||
let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout());
|
let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout());
|
||||||
dest.write_cvalue(fx, val);
|
dest.write_cvalue(fx, val);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1127,6 +1127,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
|
||||||
self.llbb().add_assignment(self.location, aligned_destination, val);
|
self.llbb().add_assignment(self.location, aligned_destination, val);
|
||||||
// TODO(antoyo): handle align and flags.
|
// TODO(antoyo): handle align and flags.
|
||||||
// NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here?
|
// NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here?
|
||||||
|
// When adding support for NONTEMPORAL, make sure to not just emit MOVNT on x86; see the
|
||||||
|
// LLVM backend for details.
|
||||||
self.cx.context.new_rvalue_zero(self.type_i32())
|
self.cx.context.new_rvalue_zero(self.type_i32())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -728,13 +728,32 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
|
||||||
llvm::LLVMSetVolatile(store, llvm::True);
|
llvm::LLVMSetVolatile(store, llvm::True);
|
||||||
}
|
}
|
||||||
if flags.contains(MemFlags::NONTEMPORAL) {
|
if flags.contains(MemFlags::NONTEMPORAL) {
|
||||||
// According to LLVM [1] building a nontemporal store must
|
// Make sure that the current target architectures supports "sane" non-temporal
|
||||||
// *always* point to a metadata value of the integer 1.
|
// stores, i.e., non-temporal stores that are equivalent to regular stores except
|
||||||
//
|
// for performance. LLVM doesn't seem to care about this, and will happily treat
|
||||||
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
|
// `!nontemporal` stores as-if they were normal stores (for reordering optimizations
|
||||||
let one = self.cx.const_i32(1);
|
// etc) even on x86, despite later lowering them to MOVNT which do *not* behave like
|
||||||
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
|
// regular stores but require special fences.
|
||||||
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
|
// So we keep a list of architectures where `!nontemporal` is known to be truly just
|
||||||
|
// a hint, and use regular stores everywhere else.
|
||||||
|
// (In the future, we could alternatively ensure that an sfence gets emitted after a sequence of movnt
|
||||||
|
// before any kind of synchronizing operation. But it's not clear how to do that with LLVM.)
|
||||||
|
// For more context, see <https://github.com/rust-lang/rust/issues/114582> and
|
||||||
|
// <https://github.com/llvm/llvm-project/issues/64521>.
|
||||||
|
const WELL_BEHAVED_NONTEMPORAL_ARCHS: &[&str] =
|
||||||
|
&["aarch64", "arm", "riscv32", "riscv64"];
|
||||||
|
|
||||||
|
let use_nontemporal =
|
||||||
|
WELL_BEHAVED_NONTEMPORAL_ARCHS.contains(&&*self.cx.tcx.sess.target.arch);
|
||||||
|
if use_nontemporal {
|
||||||
|
// According to LLVM [1] building a nontemporal store must
|
||||||
|
// *always* point to a metadata value of the integer 1.
|
||||||
|
//
|
||||||
|
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
|
||||||
|
let one = self.cx.const_i32(1);
|
||||||
|
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
|
||||||
|
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
store
|
store
|
||||||
}
|
}
|
||||||
|
|
|
@ -2675,12 +2675,12 @@ extern "rust-intrinsic" {
|
||||||
#[rustc_nounwind]
|
#[rustc_nounwind]
|
||||||
pub fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32;
|
pub fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32;
|
||||||
|
|
||||||
/// Emits a `!nontemporal` store according to LLVM (see their docs).
|
/// Emits a `nontemporal` store, which gives a hint to the CPU that the data should not be held
|
||||||
/// Probably will never become stable.
|
/// in cache. Except for performance, this is fully equivalent to `ptr.write(val)`.
|
||||||
///
|
///
|
||||||
/// Do NOT use this intrinsic; "nontemporal" operations do not exist in our memory model!
|
/// Not all architectures provide such an operation. For instance, x86 does not: while `MOVNT`
|
||||||
/// It exists to support current stdarch, but the plan is to change stdarch and remove this intrinsic.
|
/// exists, that operation is *not* equivalent to `ptr.write(val)` (`MOVNT` writes can be reordered
|
||||||
/// See <https://github.com/rust-lang/rust/issues/114582> for some more discussion.
|
/// in ways that are not allowed for regular writes).
|
||||||
#[rustc_nounwind]
|
#[rustc_nounwind]
|
||||||
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
|
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,37 @@
|
||||||
//@ compile-flags: -O
|
//@ compile-flags: -O
|
||||||
|
//@revisions: with_nontemporal without_nontemporal
|
||||||
|
//@[with_nontemporal] compile-flags: --target aarch64-unknown-linux-gnu
|
||||||
|
//@[with_nontemporal] needs-llvm-components: aarch64
|
||||||
|
//@[without_nontemporal] compile-flags: --target x86_64-unknown-linux-gnu
|
||||||
|
//@[without_nontemporal] needs-llvm-components: x86
|
||||||
|
|
||||||
#![feature(core_intrinsics)]
|
// Ensure that we *do* emit the `!nontemporal` flag on architectures where it
|
||||||
|
// is well-behaved, but do *not* emit it on architectures where it is ill-behaved.
|
||||||
|
// For more context, see <https://github.com/rust-lang/rust/issues/114582> and
|
||||||
|
// <https://github.com/llvm/llvm-project/issues/64521>.
|
||||||
|
|
||||||
|
#![feature(no_core, lang_items, intrinsics)]
|
||||||
|
#![no_core]
|
||||||
#![crate_type = "lib"]
|
#![crate_type = "lib"]
|
||||||
|
|
||||||
|
#[lang = "sized"]
|
||||||
|
pub trait Sized {}
|
||||||
|
#[lang = "copy"]
|
||||||
|
pub trait Copy {}
|
||||||
|
|
||||||
|
impl Copy for u32 {}
|
||||||
|
impl<T> Copy for *mut T {}
|
||||||
|
|
||||||
|
extern "rust-intrinsic" {
|
||||||
|
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
|
||||||
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub fn a(a: &mut u32, b: u32) {
|
pub fn a(a: &mut u32, b: u32) {
|
||||||
// CHECK-LABEL: define{{.*}}void @a
|
// CHECK-LABEL: define{{.*}}void @a
|
||||||
// CHECK: store i32 %b, ptr %a, align 4, !nontemporal
|
// with_nontemporal: store i32 %b, ptr %a, align 4, !nontemporal
|
||||||
|
// without_nontemporal-NOT: nontemporal
|
||||||
unsafe {
|
unsafe {
|
||||||
std::intrinsics::nontemporal_store(a, b);
|
nontemporal_store(a, b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue