Add more SIMD
This commit is contained in:
parent
5088fb3d3b
commit
4636c59df5
5 changed files with 102 additions and 30 deletions
|
@ -1343,7 +1343,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
}
|
||||
// TODO(antoyo): switch to using new_vector_access.
|
||||
let array = self.context.new_bitcast(None, v2, array_type);
|
||||
for i in 0..vec_num_units {
|
||||
for i in 0..(mask_num_units - vec_num_units) {
|
||||
elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
|
||||
}
|
||||
let v1 = self.context.new_rvalue_from_vector(None, result_type, &elements);
|
||||
|
|
|
@ -27,12 +27,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
|
|||
}
|
||||
// NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
|
||||
// SIMD builtins require a constant value.
|
||||
if value.get_type() != typ {
|
||||
self.context.new_bitcast(None, value, typ)
|
||||
}
|
||||
else {
|
||||
value
|
||||
}
|
||||
self.bitcast_if_needed(value, typ)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -86,13 +81,7 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
|
|||
|
||||
// TODO(antoyo): set alignment.
|
||||
|
||||
let value =
|
||||
if value.get_type() != gcc_type {
|
||||
self.context.new_bitcast(None, value, gcc_type)
|
||||
}
|
||||
else {
|
||||
value
|
||||
};
|
||||
let value = self.bitcast_if_needed(value, gcc_type);
|
||||
global.global_set_initializer_rvalue(value);
|
||||
|
||||
// As an optimization, all shared statics which do not have interior
|
||||
|
|
|
@ -279,6 +279,15 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
|
|||
pub fn sess(&self) -> &Session {
|
||||
&self.tcx.sess
|
||||
}
|
||||
|
||||
pub fn bitcast_if_needed(&self, value: RValue<'gcc>, expected_type: Type<'gcc>) -> RValue<'gcc> {
|
||||
if value.get_type() != expected_type {
|
||||
self.context.new_bitcast(None, value, expected_type)
|
||||
}
|
||||
else {
|
||||
value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> {
|
||||
|
|
|
@ -21,6 +21,25 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
|
|||
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
|
||||
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
|
||||
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
|
||||
|
||||
// The above doc points to unknown builtins for the following, so override them:
|
||||
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
|
||||
"llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gathersiv8si",
|
||||
"llvm.x86.avx2.gather.d.ps" => "__builtin_ia32_gathersiv4sf",
|
||||
"llvm.x86.avx2.gather.d.ps.256" => "__builtin_ia32_gathersiv8sf",
|
||||
"llvm.x86.avx2.gather.d.q" => "__builtin_ia32_gathersiv2di",
|
||||
"llvm.x86.avx2.gather.d.q.256" => "__builtin_ia32_gathersiv4di",
|
||||
"llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gathersiv2df",
|
||||
"llvm.x86.avx2.gather.d.pd.256" => "__builtin_ia32_gathersiv4df",
|
||||
"llvm.x86.avx2.gather.q.d" => "__builtin_ia32_gatherdiv4si",
|
||||
"llvm.x86.avx2.gather.q.d.256" => "__builtin_ia32_gatherdiv4si256",
|
||||
"llvm.x86.avx2.gather.q.ps" => "__builtin_ia32_gatherdiv4sf",
|
||||
"llvm.x86.avx2.gather.q.ps.256" => "__builtin_ia32_gatherdiv4sf256",
|
||||
"llvm.x86.avx2.gather.q.q" => "__builtin_ia32_gatherdiv2di",
|
||||
"llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherdiv4di",
|
||||
"llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherdiv2df",
|
||||
"llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherdiv4df",
|
||||
"" => "",
|
||||
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
|
||||
_ => include!("archs.rs"),
|
||||
};
|
||||
|
|
|
@ -202,14 +202,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
};
|
||||
let builtin = bx.context.get_target_builtin_function(func_name);
|
||||
let param1_type = builtin.get_param(0).to_rvalue().get_type();
|
||||
let vector =
|
||||
if vector.get_type() != param1_type {
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
bx.context.new_bitcast(None, vector, param1_type)
|
||||
}
|
||||
else {
|
||||
vector
|
||||
};
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
let vector = bx.cx.bitcast_if_needed(vector, param1_type);
|
||||
let result = bx.context.new_call(None, builtin, &[vector, value, bx.context.new_cast(None, index, bx.int_type)]);
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
return Ok(bx.context.new_bitcast(None, result, vector.get_type()));
|
||||
|
@ -539,18 +533,79 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
|
||||
|
||||
let func = bx.context.get_target_builtin_function(builtin_name);
|
||||
let param1_type = func.get_parameter(0).get_type();
|
||||
let lhs =
|
||||
if lhs.get_type() != param1_type {
|
||||
bx.context.new_bitcast(None, lhs, param1_type)
|
||||
}
|
||||
else {
|
||||
lhs
|
||||
};
|
||||
let param1_type = func.get_param(0).to_rvalue().get_type();
|
||||
let param2_type = func.get_param(1).to_rvalue().get_type();
|
||||
let lhs = bx.cx.bitcast_if_needed(lhs, param1_type);
|
||||
let rhs = bx.cx.bitcast_if_needed(rhs, param2_type);
|
||||
let result = bx.context.new_call(None, func, &[lhs, rhs]);
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
return Ok(bx.context.new_bitcast(None, result, vec_ty));
|
||||
}
|
||||
|
||||
macro_rules! arith_red {
|
||||
($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident,
|
||||
$identity:expr) => {
|
||||
if name == sym::$name {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
"expected return type `{}` (element of input `{}`), found `{}`",
|
||||
in_elem,
|
||||
in_ty,
|
||||
ret_ty
|
||||
);
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {
|
||||
let r = bx.$integer_reduce(args[0].immediate());
|
||||
if $ordered {
|
||||
// if overflow occurs, the result is the
|
||||
// mathematical result modulo 2^n:
|
||||
Ok(bx.$op(args[1].immediate(), r))
|
||||
} else {
|
||||
Ok(bx.$integer_reduce(args[0].immediate()))
|
||||
}
|
||||
}
|
||||
ty::Float(f) => {
|
||||
let acc = if $ordered {
|
||||
// ordered arithmetic reductions take an accumulator
|
||||
args[1].immediate()
|
||||
} else {
|
||||
// unordered arithmetic reductions use the identity accumulator
|
||||
match f.bit_width() {
|
||||
32 => bx.const_real(bx.type_f32(), $identity),
|
||||
64 => bx.const_real(bx.type_f64(), $identity),
|
||||
v => return_error!(
|
||||
r#"
|
||||
unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
v,
|
||||
ret_ty
|
||||
),
|
||||
}
|
||||
};
|
||||
Ok(bx.$float_reduce(acc, args[0].immediate()))
|
||||
}
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty
|
||||
),
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// TODO: use a recursive algorithm a-la Hacker's Delight.
|
||||
arith_red!(
|
||||
simd_reduce_add_unordered: vector_reduce_add,
|
||||
vector_reduce_fadd_fast,
|
||||
false,
|
||||
add,
|
||||
0.0
|
||||
);
|
||||
|
||||
unimplemented!("simd {}", name);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue