Rollup merge of #114382 - scottmcm:compare-bytes-intrinsic, r=cjgillot

Add a new `compare_bytes` intrinsic instead of calling `memcmp` directly

As discussed in #113435, this lets the backends be the place that can have the "don't call the function if n == 0" logic, if it's needed for the target.  (I didn't actually *add* those checks, though, since as I understood it we didn't actually need them on known targets?)

Doing this also let me make it `const` (unstable), which I don't think `extern "C" fn memcmp` can be.

cc `@RalfJung` `@Amanieu`
This commit is contained in:
Matthias Krüger 2023-08-07 05:29:12 +02:00 committed by GitHub
commit cbe2522652
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 270 additions and 22 deletions

View file

@ -1155,6 +1155,20 @@ fn codegen_regular_intrinsic_call<'tcx>(
ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout()));
}
sym::compare_bytes => {
intrinsic_args!(fx, args => (lhs_ptr, rhs_ptr, bytes_val); intrinsic);
let lhs_ptr = lhs_ptr.load_scalar(fx);
let rhs_ptr = rhs_ptr.load_scalar(fx);
let bytes_val = bytes_val.load_scalar(fx);
let params = vec![AbiParam::new(fx.pointer_type); 3];
let returns = vec![AbiParam::new(types::I32)];
let args = &[lhs_ptr, rhs_ptr, bytes_val];
// Here we assume that the `memcmp` provided by the target is a NOP for size 0.
let cmp = fx.lib_call("memcmp", params, returns, args)[0];
ret.write_cvalue(fx, CValue::by_val(cmp, ret.layout()));
}
sym::const_allocate => {
intrinsic_args!(fx, args => (_size, _align); intrinsic);

View file

@ -302,6 +302,21 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
}
}
sym::compare_bytes => {
let a = args[0].immediate();
let b = args[1].immediate();
let n = args[2].immediate();
let void_ptr_type = self.context.new_type::<*const ()>();
let a_ptr = self.bitcast(a, void_ptr_type);
let b_ptr = self.bitcast(b, void_ptr_type);
// Here we assume that the `memcmp` provided by the target is a NOP for size 0.
let builtin = self.context.get_builtin_function("memcmp");
let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
self.sext(cmp, self.type_ix(32))
}
sym::black_box => {
args[0].val.store(self, result);

View file

@ -891,7 +891,8 @@ impl<'ll> CodegenCx<'ll, '_> {
ifn!("llvm.prefetch", fn(ptr, t_i32, t_i32, t_i32) -> void);
// This isn't an "LLVM intrinsic", but LLVM's optimization passes
// recognize it like one and we assume it exists in `core::slice::cmp`
// recognize it like one (including turning it into `bcmp` sometimes)
// and we use it to implement intrinsics like `raw_eq` and `compare_bytes`
match self.sess().target.arch.as_ref() {
"avr" | "msp430" => ifn!("memcmp", fn(ptr, ptr, t_isize) -> t_i16),
_ => ifn!("memcmp", fn(ptr, ptr, t_isize) -> t_i32),

View file

@ -329,6 +329,16 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> {
}
}
sym::compare_bytes => {
// Here we assume that the `memcmp` provided by the target is a NOP for size 0.
let cmp = self.call_intrinsic(
"memcmp",
&[args[0].immediate(), args[1].immediate(), args[2].immediate()],
);
// Some targets have `memcmp` returning `i16`, but the intrinsic is always `i32`.
self.sext(cmp, self.type_ix(32))
}
sym::black_box => {
args[0].val.store(self, result);
let result_val_span = [result.llval];

View file

@ -261,6 +261,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
sym::write_bytes => {
self.write_bytes_intrinsic(&args[0], &args[1], &args[2])?;
}
sym::compare_bytes => {
let result = self.compare_bytes_intrinsic(&args[0], &args[1], &args[2])?;
self.write_scalar(result, dest)?;
}
sym::arith_offset => {
let ptr = self.read_pointer(&args[0])?;
let offset_count = self.read_target_isize(&args[1])?;
@ -643,6 +647,24 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
self.write_bytes_ptr(dst, bytes)
}
pub(crate) fn compare_bytes_intrinsic(
&mut self,
left: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
right: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
byte_count: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
) -> InterpResult<'tcx, Scalar<M::Provenance>> {
let left = self.read_pointer(left)?;
let right = self.read_pointer(right)?;
let n = Size::from_bytes(self.read_target_usize(byte_count)?);
let left_bytes = self.read_bytes_ptr_strip_provenance(left, n)?;
let right_bytes = self.read_bytes_ptr_strip_provenance(right, n)?;
// `Ordering`'s discriminants are -1/0/+1, so casting does the right thing.
let result = Ord::cmp(left_bytes, right_bytes) as i32;
Ok(Scalar::from_i32(result))
}
pub(crate) fn raw_eq_intrinsic(
&mut self,
lhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,

View file

@ -273,6 +273,10 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
],
Ty::new_unit(tcx),
),
sym::compare_bytes => {
let byte_ptr = Ty::new_imm_ptr(tcx, tcx.types.u8);
(0, vec![byte_ptr, byte_ptr, tcx.types.usize], tcx.types.i32)
}
sym::write_bytes | sym::volatile_set_memory => (
1,
vec![

View file

@ -498,6 +498,7 @@ symbols! {
cold,
collapse_debuginfo,
column,
compare_bytes,
compare_exchange,
compare_exchange_weak,
compile_error,

View file

@ -2385,6 +2385,25 @@ extern "rust-intrinsic" {
#[rustc_nounwind]
pub fn raw_eq<T>(a: &T, b: &T) -> bool;
/// Lexicographically compare `[left, left + bytes)` and `[right, right + bytes)`
/// as unsigned bytes, returning negative if `left` is less, zero if all the
/// bytes match, or positive if `right` is greater.
///
/// This underlies things like `<[u8]>::cmp`, and will usually lower to `memcmp`.
///
/// # Safety
///
/// `left` and `right` must each be [valid] for reads of `bytes` bytes.
///
/// Note that this applies to the whole range, not just until the first byte
/// that differs. That allows optimizations that can read in large chunks.
///
/// [valid]: crate::ptr#safety
#[cfg(not(bootstrap))]
#[rustc_const_unstable(feature = "const_intrinsic_compare_bytes", issue = "none")]
#[rustc_nounwind]
pub fn compare_bytes(left: *const u8, right: *const u8, bytes: usize) -> i32;
/// See documentation of [`std::hint::black_box`] for details.
///
/// [`std::hint::black_box`]: crate::hint::black_box
@ -2825,3 +2844,18 @@ pub const unsafe fn write_bytes<T>(dst: *mut T, val: u8, count: usize) {
write_bytes(dst, val, count)
}
}
/// Backfill for bootstrap
#[cfg(bootstrap)]
pub unsafe fn compare_bytes(left: *const u8, right: *const u8, bytes: usize) -> i32 {
extern "C" {
fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> crate::ffi::c_int;
}
if bytes != 0 {
// SAFETY: Since bytes is non-zero, the caller has met `memcmp`'s requirements.
unsafe { memcmp(left, right, bytes).into() }
} else {
0
}
}

View file

@ -1,22 +1,12 @@
//! Comparison traits for `[T]`.
use crate::cmp::{self, BytewiseEq, Ordering};
use crate::ffi;
use crate::intrinsics::compare_bytes;
use crate::mem;
use super::from_raw_parts;
use super::memchr;
extern "C" {
/// Calls implementation provided memcmp.
///
/// Interprets the data as u8.
///
/// Returns 0 for equal, < 0 for less than and > 0 for greater
/// than.
fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> ffi::c_int;
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<A, B> PartialEq<[B]> for [A]
where
@ -74,7 +64,8 @@ where
}
}
// Use memcmp for bytewise equality when the types allow
// When each element can be compared byte-wise, we can compare all the bytes
// from the whole size in one call to the intrinsics.
impl<A, B> SlicePartialEq<B> for [A]
where
A: BytewiseEq<B>,
@ -88,7 +79,7 @@ where
// The two slices have been checked to have the same size above.
unsafe {
let size = mem::size_of_val(self);
memcmp(self.as_ptr() as *const u8, other.as_ptr() as *const u8, size) == 0
compare_bytes(self.as_ptr() as *const u8, other.as_ptr() as *const u8, size) == 0
}
}
}
@ -183,7 +174,7 @@ impl<A: Ord> SliceOrd for A {
}
}
// memcmp compares a sequence of unsigned bytes lexicographically.
// `compare_bytes` compares a sequence of unsigned bytes lexicographically.
// this matches the order we want for [u8], but no others (not even [i8]).
impl SliceOrd for u8 {
#[inline]
@ -195,7 +186,7 @@ impl SliceOrd for u8 {
// SAFETY: `left` and `right` are references and are thus guaranteed to be valid.
// We use the minimum of both lengths which guarantees that both regions are
// valid for reads in that interval.
let mut order = unsafe { memcmp(left.as_ptr(), right.as_ptr(), len) as isize };
let mut order = unsafe { compare_bytes(left.as_ptr(), right.as_ptr(), len) as isize };
if order == 0 {
order = diff;
}

View file

@ -1,8 +1,8 @@
error: Undefined Behavior: reading memory at ALLOC[0x0..0x10], but memory is uninitialized at [0x4..0x10], and this operation requires initialized memory
--> RUSTLIB/core/src/slice/cmp.rs:LL:CC
|
LL | let mut order = unsafe { memcmp(left.as_ptr(), right.as_ptr(), len) as isize };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ reading memory at ALLOC[0x0..0x10], but memory is uninitialized at [0x4..0x10], and this operation requires initialized memory
LL | let mut order = unsafe { compare_bytes(left.as_ptr(), right.as_ptr(), len) as isize };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ reading memory at ALLOC[0x0..0x10], but memory is uninitialized at [0x4..0x10], and this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information

View file

@ -1,8 +1,8 @@
error: Undefined Behavior: reading memory at ALLOC[0x0..0x8], but memory is uninitialized at [0x4..0x8], and this operation requires initialized memory
--> RUSTLIB/core/src/slice/cmp.rs:LL:CC
|
LL | let mut order = unsafe { memcmp(left.as_ptr(), right.as_ptr(), len) as isize };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ reading memory at ALLOC[0x0..0x8], but memory is uninitialized at [0x4..0x8], and this operation requires initialized memory
LL | let mut order = unsafe { compare_bytes(left.as_ptr(), right.as_ptr(), len) as isize };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ reading memory at ALLOC[0x0..0x8], but memory is uninitialized at [0x4..0x8], and this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information

View file

@ -0,0 +1,34 @@
// revisions: INT32 INT16
// compile-flags: -O
// [INT32] ignore-16bit
// [INT16] only-16bit
#![crate_type = "lib"]
#![feature(core_intrinsics)]
use std::intrinsics::compare_bytes;
#[no_mangle]
// CHECK-LABEL: @bytes_cmp(
pub unsafe fn bytes_cmp(a: *const u8, b: *const u8, n: usize) -> i32 {
// INT32: %[[TEMP:.+]] = tail call i32 @memcmp(ptr %a, ptr %b, {{i32|i64}} %n)
// INT32-NOT: sext
// INT32: ret i32 %[[TEMP]]
// INT16: %[[TEMP1:.+]] = tail call i16 @memcmp(ptr %a, ptr %b, i16 %n)
// INT16: %[[TEMP2:.+]] = sext i16 %[[TEMP1]] to i32
// INT16: ret i32 %[[TEMP2]]
compare_bytes(a, b, n)
}
// Ensure that, even though there's an `sext` emitted by the intrinsic,
// that doesn't end up pessiming checks against zero.
#[no_mangle]
// CHECK-LABEL: @bytes_eq(
pub unsafe fn bytes_eq(a: *const u8, b: *const u8, n: usize) -> bool {
// CHECK: call {{.+}} @{{bcmp|memcmp}}(ptr %a, ptr %b, {{i16|i32|i64}} %n)
// CHECK-NOT: sext
// INT32: icmp eq i32
// INT16: icmp eq i16
compare_bytes(a, b, n) == 0_i32
}

View file

@ -0,0 +1,41 @@
// check-fail
#![feature(core_intrinsics)]
#![feature(const_intrinsic_compare_bytes)]
use std::intrinsics::compare_bytes;
use std::mem::MaybeUninit;
fn main() {
const LHS_NULL: i32 = unsafe {
compare_bytes(0 as *const u8, 2 as *const u8, 0)
//~^ ERROR evaluation of constant value failed
};
const RHS_NULL: i32 = unsafe {
compare_bytes(1 as *const u8, 0 as *const u8, 0)
//~^ ERROR evaluation of constant value failed
};
const DANGLING_PTR_NON_ZERO_LENGTH: i32 = unsafe {
compare_bytes(1 as *const u8, 2 as *const u8, 1)
//~^ ERROR evaluation of constant value failed
};
const LHS_OUT_OF_BOUNDS: i32 = unsafe {
compare_bytes([1, 2, 3].as_ptr(), [1, 2, 3, 4].as_ptr(), 4)
//~^ ERROR evaluation of constant value failed
};
const RHS_OUT_OF_BOUNDS: i32 = unsafe {
compare_bytes([1, 2, 3, 4].as_ptr(), [1, 2, 3].as_ptr(), 4)
//~^ ERROR evaluation of constant value failed
};
const LHS_UNINIT: i32 = unsafe {
compare_bytes(MaybeUninit::uninit().as_ptr(), [1].as_ptr(), 1)
//~^ ERROR evaluation of constant value failed
};
const RHS_UNINIT: i32 = unsafe {
compare_bytes([1].as_ptr(), MaybeUninit::uninit().as_ptr(), 1)
//~^ ERROR evaluation of constant value failed
};
const WITH_PROVENANCE: i32 = unsafe {
compare_bytes([&1].as_ptr().cast(), [&2].as_ptr().cast(), std::mem::size_of::<usize>())
//~^ ERROR evaluation of constant value failed
};
}

View file

@ -0,0 +1,54 @@
error[E0080]: evaluation of constant value failed
--> $DIR/const-compare-bytes-ub.rs:10:9
|
LL | compare_bytes(0 as *const u8, 2 as *const u8, 0)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ memory access failed: null pointer is a dangling pointer (it has no provenance)
error[E0080]: evaluation of constant value failed
--> $DIR/const-compare-bytes-ub.rs:14:9
|
LL | compare_bytes(1 as *const u8, 0 as *const u8, 0)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ memory access failed: null pointer is a dangling pointer (it has no provenance)
error[E0080]: evaluation of constant value failed
--> $DIR/const-compare-bytes-ub.rs:18:9
|
LL | compare_bytes(1 as *const u8, 2 as *const u8, 1)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ memory access failed: 0x1[noalloc] is a dangling pointer (it has no provenance)
error[E0080]: evaluation of constant value failed
--> $DIR/const-compare-bytes-ub.rs:22:9
|
LL | compare_bytes([1, 2, 3].as_ptr(), [1, 2, 3, 4].as_ptr(), 4)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ memory access failed: alloc6 has size 3, so pointer to 4 bytes starting at offset 0 is out-of-bounds
error[E0080]: evaluation of constant value failed
--> $DIR/const-compare-bytes-ub.rs:26:9
|
LL | compare_bytes([1, 2, 3, 4].as_ptr(), [1, 2, 3].as_ptr(), 4)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ memory access failed: alloc13 has size 3, so pointer to 4 bytes starting at offset 0 is out-of-bounds
error[E0080]: evaluation of constant value failed
--> $DIR/const-compare-bytes-ub.rs:30:9
|
LL | compare_bytes(MaybeUninit::uninit().as_ptr(), [1].as_ptr(), 1)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ reading memory at alloc17[0x0..0x1], but memory is uninitialized at [0x0..0x1], and this operation requires initialized memory
error[E0080]: evaluation of constant value failed
--> $DIR/const-compare-bytes-ub.rs:34:9
|
LL | compare_bytes([1].as_ptr(), MaybeUninit::uninit().as_ptr(), 1)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ reading memory at alloc25[0x0..0x1], but memory is uninitialized at [0x0..0x1], and this operation requires initialized memory
error[E0080]: evaluation of constant value failed
--> $DIR/const-compare-bytes-ub.rs:38:9
|
LL | compare_bytes([&1].as_ptr().cast(), [&2].as_ptr().cast(), std::mem::size_of::<usize>())
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ unable to turn pointer into integer
|
= help: this code performed an operation that depends on the underlying bytes representing a pointer
= help: the absolute address of a pointer is not known at compile-time, so such operations are not supported
error: aborting due to 8 previous errors
For more information about this error, try `rustc --explain E0080`.

View file

@ -0,0 +1,27 @@
// run-pass
#![feature(core_intrinsics)]
#![feature(const_intrinsic_compare_bytes)]
use std::intrinsics::compare_bytes;
fn main() {
const A: i32 = unsafe {
compare_bytes(1 as *const u8, 2 as *const u8, 0)
};
assert_eq!(A, 0);
const B: i32 = unsafe {
compare_bytes([1, 2].as_ptr(), [1, 3].as_ptr(), 1)
};
assert_eq!(B, 0);
const C: i32 = unsafe {
compare_bytes([1, 2, 9].as_ptr(), [1, 3, 8].as_ptr(), 2)
};
assert!(C < 0);
const D: i32 = unsafe {
compare_bytes([1, 3, 8].as_ptr(), [1, 2, 9].as_ptr(), 2)
};
assert!(D > 0);
}

View file

@ -18,7 +18,7 @@ Respanned: TokenStream [Ident { ident: "$crate", span: $DIR/auxiliary/make-macro
use core /* 0#1 */::prelude /* 0#1 */::rust_2018 /* 0#1 */::*;
#[macro_use /* 0#1 */]
extern crate core /* 0#1 */;
extern crate compiler_builtins /* 444 */ as _ /* 0#1 */;
extern crate compiler_builtins /* 445 */ as _ /* 0#1 */;
// Don't load unnecessary hygiene information from std
extern crate std /* 0#0 */;

View file

@ -39,7 +39,7 @@ PRINT-BANG INPUT (DEBUG): TokenStream [
use ::core /* 0#1 */::prelude /* 0#1 */::rust_2015 /* 0#1 */::*;
#[macro_use /* 0#1 */]
extern crate core /* 0#2 */;
extern crate compiler_builtins /* 444 */ as _ /* 0#2 */;
extern crate compiler_builtins /* 445 */ as _ /* 0#2 */;
// Don't load unnecessary hygiene information from std
extern crate std /* 0#0 */;