Rollup merge of #133395 - calebzulawski:simd_relaxed_fma, r=workingjubilee
Add simd_relaxed_fma intrinsic Adds compiler support for https://github.com/rust-lang/portable-simd/issues/387#issuecomment-2337169786 r? `@workingjubilee` cc `@RalfJung` is this kind of nondeterminism a problem for miri/opsem?
This commit is contained in:
commit
9709334061
7 changed files with 27 additions and 2 deletions
|
@ -415,7 +415,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
sym::simd_fma => {
|
// FIXME: simd_relaxed_fma doesn't relax to non-fused multiply-add
|
||||||
|
sym::simd_fma | sym::simd_relaxed_fma => {
|
||||||
intrinsic_args!(fx, args => (a, b, c); intrinsic);
|
intrinsic_args!(fx, args => (a, b, c); intrinsic);
|
||||||
|
|
||||||
if !a.layout().ty.is_simd() {
|
if !a.layout().ty.is_simd() {
|
||||||
|
|
|
@ -772,6 +772,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
||||||
sym::simd_flog => "log",
|
sym::simd_flog => "log",
|
||||||
sym::simd_floor => "floor",
|
sym::simd_floor => "floor",
|
||||||
sym::simd_fma => "fma",
|
sym::simd_fma => "fma",
|
||||||
|
sym::simd_relaxed_fma => "fma", // FIXME: this should relax to non-fused multiply-add when necessary
|
||||||
sym::simd_fpowi => "__builtin_powi",
|
sym::simd_fpowi => "__builtin_powi",
|
||||||
sym::simd_fpow => "pow",
|
sym::simd_fpow => "pow",
|
||||||
sym::simd_fsin => "sin",
|
sym::simd_fsin => "sin",
|
||||||
|
|
|
@ -1534,6 +1534,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
|
||||||
sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
|
sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
|
||||||
sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
|
sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
|
||||||
sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
|
sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
|
||||||
|
sym::simd_relaxed_fma => ("fmuladd", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
|
||||||
sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
|
sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
|
||||||
sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
|
sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
|
||||||
sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
|
sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
|
||||||
|
@ -1572,6 +1573,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
|
||||||
| sym::simd_fpowi
|
| sym::simd_fpowi
|
||||||
| sym::simd_fsin
|
| sym::simd_fsin
|
||||||
| sym::simd_fsqrt
|
| sym::simd_fsqrt
|
||||||
|
| sym::simd_relaxed_fma
|
||||||
| sym::simd_round
|
| sym::simd_round
|
||||||
| sym::simd_trunc
|
| sym::simd_trunc
|
||||||
) {
|
) {
|
||||||
|
|
|
@ -641,7 +641,9 @@ pub fn check_intrinsic_type(
|
||||||
| sym::simd_round
|
| sym::simd_round
|
||||||
| sym::simd_trunc => (1, 0, vec![param(0)], param(0)),
|
| sym::simd_trunc => (1, 0, vec![param(0)], param(0)),
|
||||||
sym::simd_fpowi => (1, 0, vec![param(0), tcx.types.i32], param(0)),
|
sym::simd_fpowi => (1, 0, vec![param(0), tcx.types.i32], param(0)),
|
||||||
sym::simd_fma => (1, 0, vec![param(0), param(0), param(0)], param(0)),
|
sym::simd_fma | sym::simd_relaxed_fma => {
|
||||||
|
(1, 0, vec![param(0), param(0), param(0)], param(0))
|
||||||
|
}
|
||||||
sym::simd_gather => (3, 0, vec![param(0), param(1), param(2)], param(0)),
|
sym::simd_gather => (3, 0, vec![param(0), param(1), param(2)], param(0)),
|
||||||
sym::simd_masked_load => (3, 0, vec![param(0), param(1), param(2)], param(2)),
|
sym::simd_masked_load => (3, 0, vec![param(0), param(1), param(2)], param(2)),
|
||||||
sym::simd_masked_store => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit),
|
sym::simd_masked_store => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit),
|
||||||
|
|
|
@ -1844,6 +1844,7 @@ symbols! {
|
||||||
simd_reduce_mul_unordered,
|
simd_reduce_mul_unordered,
|
||||||
simd_reduce_or,
|
simd_reduce_or,
|
||||||
simd_reduce_xor,
|
simd_reduce_xor,
|
||||||
|
simd_relaxed_fma,
|
||||||
simd_rem,
|
simd_rem,
|
||||||
simd_round,
|
simd_round,
|
||||||
simd_saturating_add,
|
simd_saturating_add,
|
||||||
|
|
|
@ -612,6 +612,20 @@ extern "rust-intrinsic" {
|
||||||
#[rustc_nounwind]
|
#[rustc_nounwind]
|
||||||
pub fn simd_fma<T>(x: T, y: T, z: T) -> T;
|
pub fn simd_fma<T>(x: T, y: T, z: T) -> T;
|
||||||
|
|
||||||
|
/// Computes `(x*y) + z` for each element, non-deterministically executing either
|
||||||
|
/// a fused multiply-add or two operations with rounding of the intermediate result.
|
||||||
|
///
|
||||||
|
/// The operation is fused if the code generator determines that target instruction
|
||||||
|
/// set has support for a fused operation, and that the fused operation is more efficient
|
||||||
|
/// than the equivalent, separate pair of mul and add instructions. It is unspecified
|
||||||
|
/// whether or not a fused operation is selected, and that may depend on optimization
|
||||||
|
/// level and context, for example.
|
||||||
|
///
|
||||||
|
/// `T` must be a vector of floats.
|
||||||
|
#[cfg(not(bootstrap))]
|
||||||
|
#[rustc_nounwind]
|
||||||
|
pub fn simd_relaxed_fma<T>(x: T, y: T, z: T) -> T;
|
||||||
|
|
||||||
// Computes the sine of each element.
|
// Computes the sine of each element.
|
||||||
///
|
///
|
||||||
/// `T` must be a vector of floats.
|
/// `T` must be a vector of floats.
|
||||||
|
|
|
@ -23,6 +23,7 @@ extern "rust-intrinsic" {
|
||||||
fn simd_fexp<T>(x: T) -> T;
|
fn simd_fexp<T>(x: T) -> T;
|
||||||
fn simd_fexp2<T>(x: T) -> T;
|
fn simd_fexp2<T>(x: T) -> T;
|
||||||
fn simd_fma<T>(x: T, y: T, z: T) -> T;
|
fn simd_fma<T>(x: T, y: T, z: T) -> T;
|
||||||
|
fn simd_relaxed_fma<T>(x: T, y: T, z: T) -> T;
|
||||||
fn simd_flog<T>(x: T) -> T;
|
fn simd_flog<T>(x: T) -> T;
|
||||||
fn simd_flog10<T>(x: T) -> T;
|
fn simd_flog10<T>(x: T) -> T;
|
||||||
fn simd_flog2<T>(x: T) -> T;
|
fn simd_flog2<T>(x: T) -> T;
|
||||||
|
@ -77,6 +78,9 @@ fn main() {
|
||||||
let r = simd_fma(x, h, h);
|
let r = simd_fma(x, h, h);
|
||||||
assert_approx_eq!(x, r);
|
assert_approx_eq!(x, r);
|
||||||
|
|
||||||
|
let r = simd_relaxed_fma(x, h, h);
|
||||||
|
assert_approx_eq!(x, r);
|
||||||
|
|
||||||
let r = simd_fsqrt(x);
|
let r = simd_fsqrt(x);
|
||||||
assert_approx_eq!(x, r);
|
assert_approx_eq!(x, r);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue