Add intrinsics for float arithmetic with fast
flag enabled
`fast` a.k.a UnsafeAlgebra is the flag for enabling all "unsafe" (according to llvm) float optimizations. See LangRef for more information http://llvm.org/docs/LangRef.html#fast-math-flags Providing these operations with less precise associativity rules (for example) is useful to numerical applications. For example, the summation loop: let sum = 0.; for element in data { sum += *element; } Using the default floating point semantics, this loop expresses the floats must be added in a sequence, one after another. This constraint is usually completely unintended, and it means that no autovectorization is possible.
This commit is contained in:
parent
235d77457d
commit
2dbac1fb8e
9 changed files with 261 additions and 0 deletions
|
@ -539,6 +539,32 @@ extern "rust-intrinsic" {
|
|||
/// Returns the nearest integer to an `f64`. Rounds half-way cases away from zero.
|
||||
pub fn roundf64(x: f64) -> f64;
|
||||
|
||||
/// Float addition that allows optimizations based on algebraic rules.
|
||||
/// May assume inputs are finite.
|
||||
#[cfg(not(stage0))]
|
||||
pub fn fadd_fast<T>(a: T, b: T) -> T;
|
||||
|
||||
/// Float subtraction that allows optimizations based on algebraic rules.
|
||||
/// May assume inputs are finite.
|
||||
#[cfg(not(stage0))]
|
||||
pub fn fsub_fast<T>(a: T, b: T) -> T;
|
||||
|
||||
/// Float multiplication that allows optimizations based on algebraic rules.
|
||||
/// May assume inputs are finite.
|
||||
#[cfg(not(stage0))]
|
||||
pub fn fmul_fast<T>(a: T, b: T) -> T;
|
||||
|
||||
/// Float division that allows optimizations based on algebraic rules.
|
||||
/// May assume inputs are finite.
|
||||
#[cfg(not(stage0))]
|
||||
pub fn fdiv_fast<T>(a: T, b: T) -> T;
|
||||
|
||||
/// Float remainder that allows optimizations based on algebraic rules.
|
||||
/// May assume inputs are finite.
|
||||
#[cfg(not(stage0))]
|
||||
pub fn frem_fast<T>(a: T, b: T) -> T;
|
||||
|
||||
|
||||
/// Returns the number of bits set in an integer type `T`
|
||||
pub fn ctpop<T>(x: T) -> T;
|
||||
|
||||
|
|
|
@ -1310,6 +1310,7 @@ extern {
|
|||
-> ValueRef;
|
||||
pub fn LLVMBuildNot(B: BuilderRef, V: ValueRef, Name: *const c_char)
|
||||
-> ValueRef;
|
||||
pub fn LLVMRustSetHasUnsafeAlgebra(Instr: ValueRef);
|
||||
|
||||
/* Memory */
|
||||
pub fn LLVMBuildAlloca(B: BuilderRef, Ty: TypeRef, Name: *const c_char)
|
||||
|
|
|
@ -221,6 +221,18 @@ pub fn FAdd(cx: Block,
|
|||
B(cx).fadd(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn FAddFast(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
debug_loc: DebugLoc)
|
||||
-> ValueRef {
|
||||
if cx.unreachable.get() {
|
||||
return _Undef(lhs);
|
||||
}
|
||||
debug_loc.apply(cx.fcx);
|
||||
B(cx).fadd_fast(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn Sub(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
|
@ -269,6 +281,18 @@ pub fn FSub(cx: Block,
|
|||
B(cx).fsub(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn FSubFast(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
debug_loc: DebugLoc)
|
||||
-> ValueRef {
|
||||
if cx.unreachable.get() {
|
||||
return _Undef(lhs);
|
||||
}
|
||||
debug_loc.apply(cx.fcx);
|
||||
B(cx).fsub_fast(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn Mul(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
|
@ -317,6 +341,18 @@ pub fn FMul(cx: Block,
|
|||
B(cx).fmul(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn FMulFast(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
debug_loc: DebugLoc)
|
||||
-> ValueRef {
|
||||
if cx.unreachable.get() {
|
||||
return _Undef(lhs);
|
||||
}
|
||||
debug_loc.apply(cx.fcx);
|
||||
B(cx).fmul_fast(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn UDiv(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
|
@ -365,6 +401,18 @@ pub fn FDiv(cx: Block,
|
|||
B(cx).fdiv(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn FDivFast(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
debug_loc: DebugLoc)
|
||||
-> ValueRef {
|
||||
if cx.unreachable.get() {
|
||||
return _Undef(lhs);
|
||||
}
|
||||
debug_loc.apply(cx.fcx);
|
||||
B(cx).fdiv_fast(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn URem(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
|
@ -401,6 +449,18 @@ pub fn FRem(cx: Block,
|
|||
B(cx).frem(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn FRemFast(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
debug_loc: DebugLoc)
|
||||
-> ValueRef {
|
||||
if cx.unreachable.get() {
|
||||
return _Undef(lhs);
|
||||
}
|
||||
debug_loc.apply(cx.fcx);
|
||||
B(cx).frem_fast(lhs, rhs)
|
||||
}
|
||||
|
||||
pub fn Shl(cx: Block,
|
||||
lhs: ValueRef,
|
||||
rhs: ValueRef,
|
||||
|
|
|
@ -226,6 +226,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn fadd_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("fadd");
|
||||
unsafe {
|
||||
let instr = llvm::LLVMBuildFAdd(self.llbuilder, lhs, rhs, noname());
|
||||
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
|
||||
instr
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sub(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("sub");
|
||||
unsafe {
|
||||
|
@ -254,6 +263,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn fsub_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("sub");
|
||||
unsafe {
|
||||
let instr = llvm::LLVMBuildFSub(self.llbuilder, lhs, rhs, noname());
|
||||
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
|
||||
instr
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mul(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("mul");
|
||||
unsafe {
|
||||
|
@ -282,6 +300,16 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn fmul_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("fmul");
|
||||
unsafe {
|
||||
let instr = llvm::LLVMBuildFMul(self.llbuilder, lhs, rhs, noname());
|
||||
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
|
||||
instr
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn udiv(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("udiv");
|
||||
unsafe {
|
||||
|
@ -310,6 +338,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn fdiv_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("fdiv");
|
||||
unsafe {
|
||||
let instr = llvm::LLVMBuildFDiv(self.llbuilder, lhs, rhs, noname());
|
||||
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
|
||||
instr
|
||||
}
|
||||
}
|
||||
|
||||
pub fn urem(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("urem");
|
||||
unsafe {
|
||||
|
@ -331,6 +368,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn frem_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("frem");
|
||||
unsafe {
|
||||
let instr = llvm::LLVMBuildFRem(self.llbuilder, lhs, rhs, noname());
|
||||
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
|
||||
instr
|
||||
}
|
||||
}
|
||||
|
||||
pub fn shl(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
|
||||
self.count_insn("shl");
|
||||
unsafe {
|
||||
|
|
|
@ -658,6 +658,29 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
|
|||
}
|
||||
|
||||
},
|
||||
(_, "fadd_fast") | (_, "fsub_fast") | (_, "fmul_fast") | (_, "fdiv_fast") |
|
||||
(_, "frem_fast") => {
|
||||
let sty = &arg_tys[0].sty;
|
||||
match float_type_width(sty) {
|
||||
Some(_width) =>
|
||||
match &*name {
|
||||
"fadd_fast" => FAddFast(bcx, llargs[0], llargs[1], call_debug_location),
|
||||
"fsub_fast" => FSubFast(bcx, llargs[0], llargs[1], call_debug_location),
|
||||
"fmul_fast" => FMulFast(bcx, llargs[0], llargs[1], call_debug_location),
|
||||
"fdiv_fast" => FDivFast(bcx, llargs[0], llargs[1], call_debug_location),
|
||||
"frem_fast" => FRemFast(bcx, llargs[0], llargs[1], call_debug_location),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
None => {
|
||||
span_invalid_monomorphization_error(
|
||||
tcx.sess, span,
|
||||
&format!("invalid monomorphization of `{}` intrinsic: \
|
||||
expected basic float type, found `{}`", name, sty));
|
||||
C_null(llret_ty)
|
||||
}
|
||||
}
|
||||
|
||||
},
|
||||
|
||||
|
||||
(_, "return_address") => {
|
||||
|
@ -1700,3 +1723,17 @@ fn int_type_width_signed<'tcx>(sty: &ty::TypeVariants<'tcx>, ccx: &CrateContext)
|
|||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the width of a float TypeVariant
|
||||
// Returns None if the type is not a float
|
||||
fn float_type_width<'tcx>(sty: &ty::TypeVariants<'tcx>)
|
||||
-> Option<u64> {
|
||||
use rustc::middle::ty::TyFloat;
|
||||
match *sty {
|
||||
TyFloat(t) => Some(match t {
|
||||
ast::FloatTy::F32 => 32,
|
||||
ast::FloatTy::F64 => 64,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -280,6 +280,8 @@ pub fn check_intrinsic_type(ccx: &CrateCtxt, it: &hir::ForeignItem) {
|
|||
|
||||
"overflowing_add" | "overflowing_sub" | "overflowing_mul" =>
|
||||
(1, vec![param(ccx, 0), param(ccx, 0)], param(ccx, 0)),
|
||||
"fadd_fast" | "fsub_fast" | "fmul_fast" | "fdiv_fast" | "frem_fast" =>
|
||||
(1, vec![param(ccx, 0), param(ccx, 0)], param(ccx, 0)),
|
||||
|
||||
"return_address" => (0, vec![], tcx.mk_imm_ptr(tcx.types.u8)),
|
||||
|
||||
|
|
|
@ -164,6 +164,11 @@ extern "C" void LLVMRemoveFunctionAttrString(LLVMValueRef fn, unsigned index, co
|
|||
to_remove));
|
||||
}
|
||||
|
||||
// enable fpmath flag UnsafeAlgebra
|
||||
extern "C" void LLVMRustSetHasUnsafeAlgebra(LLVMValueRef Instr) {
|
||||
unwrap<Instruction>(Instr)->setHasUnsafeAlgebra(true);
|
||||
}
|
||||
|
||||
extern "C" LLVMValueRef LLVMBuildAtomicLoad(LLVMBuilderRef B,
|
||||
LLVMValueRef source,
|
||||
const char* Name,
|
||||
|
|
60
src/test/codegen/float_math.rs
Normal file
60
src/test/codegen/float_math.rs
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// compile-flags: -C no-prepopulate-passes
|
||||
|
||||
#![crate_type = "lib"]
|
||||
#![feature(core_intrinsics)]
|
||||
|
||||
use std::intrinsics::{fadd_fast, fsub_fast, fmul_fast, fdiv_fast, frem_fast};
|
||||
|
||||
// CHECK-LABEL: @add
|
||||
#[no_mangle]
|
||||
pub fn add(x: f32, y: f32) -> f32 {
|
||||
// CHECK: fadd float
|
||||
// CHECK-NOT fast
|
||||
x + y
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @addition
|
||||
#[no_mangle]
|
||||
pub fn addition(x: f32, y: f32) -> f32 {
|
||||
// CHECK: fadd fast float
|
||||
unsafe {
|
||||
fadd_fast(x, y)
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @subtraction
|
||||
#[no_mangle]
|
||||
pub fn subtraction(x: f32, y: f32) -> f32 {
|
||||
// CHECK: fsub fast float
|
||||
unsafe {
|
||||
fsub_fast(x, y)
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @multiplication
|
||||
#[no_mangle]
|
||||
pub fn multiplication(x: f32, y: f32) -> f32 {
|
||||
// CHECK: fmul fast float
|
||||
unsafe {
|
||||
fmul_fast(x, y)
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @division
|
||||
#[no_mangle]
|
||||
pub fn division(x: f32, y: f32) -> f32 {
|
||||
// CHECK: fdiv fast float
|
||||
unsafe {
|
||||
fdiv_fast(x, y)
|
||||
}
|
||||
}
|
24
src/test/run-pass/float_math.rs
Normal file
24
src/test/run-pass/float_math.rs
Normal file
|
@ -0,0 +1,24 @@
|
|||
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(core_intrinsics)]
|
||||
|
||||
use std::intrinsics::{fadd_fast, fsub_fast, fmul_fast, fdiv_fast, frem_fast};
|
||||
|
||||
fn main() {
|
||||
// make sure they all map to the correct operation
|
||||
unsafe {
|
||||
assert_eq!(fadd_fast(1., 2.), 1. + 2.);
|
||||
assert_eq!(fsub_fast(1., 2.), 1. - 2.);
|
||||
assert_eq!(fmul_fast(2., 3.), 2. * 3.);
|
||||
assert_eq!(fdiv_fast(10., 5.), 10. / 5.);
|
||||
assert_eq!(frem_fast(10., 5.), 10. % 5.);
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue