Add more SIMD

This commit is contained in:
Antoni Boucher 2022-04-29 23:14:26 -04:00
parent 5088fb3d3b
commit 4636c59df5
5 changed files with 102 additions and 30 deletions

View file

@ -1343,7 +1343,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
}
// TODO(antoyo): switch to using new_vector_access.
let array = self.context.new_bitcast(None, v2, array_type);
for i in 0..vec_num_units {
for i in 0..(mask_num_units - vec_num_units) {
elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
}
let v1 = self.context.new_rvalue_from_vector(None, result_type, &elements);

View file

@ -27,12 +27,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
}
// NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
// SIMD builtins require a constant value.
if value.get_type() != typ {
self.context.new_bitcast(None, value, typ)
}
else {
value
}
self.bitcast_if_needed(value, typ)
}
}
@ -86,13 +81,7 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
// TODO(antoyo): set alignment.
let value =
if value.get_type() != gcc_type {
self.context.new_bitcast(None, value, gcc_type)
}
else {
value
};
let value = self.bitcast_if_needed(value, gcc_type);
global.global_set_initializer_rvalue(value);
// As an optimization, all shared statics which do not have interior

View file

@ -279,6 +279,15 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
pub fn sess(&self) -> &Session {
&self.tcx.sess
}
pub fn bitcast_if_needed(&self, value: RValue<'gcc>, expected_type: Type<'gcc>) -> RValue<'gcc> {
if value.get_type() != expected_type {
self.context.new_bitcast(None, value, expected_type)
}
else {
value
}
}
}
impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> {

View file

@ -21,6 +21,25 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
// The above doc points to unknown builtins for the following, so override them:
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
"llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gathersiv8si",
"llvm.x86.avx2.gather.d.ps" => "__builtin_ia32_gathersiv4sf",
"llvm.x86.avx2.gather.d.ps.256" => "__builtin_ia32_gathersiv8sf",
"llvm.x86.avx2.gather.d.q" => "__builtin_ia32_gathersiv2di",
"llvm.x86.avx2.gather.d.q.256" => "__builtin_ia32_gathersiv4di",
"llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gathersiv2df",
"llvm.x86.avx2.gather.d.pd.256" => "__builtin_ia32_gathersiv4df",
"llvm.x86.avx2.gather.q.d" => "__builtin_ia32_gatherdiv4si",
"llvm.x86.avx2.gather.q.d.256" => "__builtin_ia32_gatherdiv4si256",
"llvm.x86.avx2.gather.q.ps" => "__builtin_ia32_gatherdiv4sf",
"llvm.x86.avx2.gather.q.ps.256" => "__builtin_ia32_gatherdiv4sf256",
"llvm.x86.avx2.gather.q.q" => "__builtin_ia32_gatherdiv2di",
"llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherdiv4di",
"llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherdiv2df",
"llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherdiv4df",
"" => "",
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
_ => include!("archs.rs"),
};

View file

@ -202,14 +202,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
};
let builtin = bx.context.get_target_builtin_function(func_name);
let param1_type = builtin.get_param(0).to_rvalue().get_type();
let vector =
if vector.get_type() != param1_type {
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
bx.context.new_bitcast(None, vector, param1_type)
}
else {
vector
};
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
let vector = bx.cx.bitcast_if_needed(vector, param1_type);
let result = bx.context.new_call(None, builtin, &[vector, value, bx.context.new_cast(None, index, bx.int_type)]);
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
return Ok(bx.context.new_bitcast(None, result, vector.get_type()));
@ -539,18 +533,79 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
let func = bx.context.get_target_builtin_function(builtin_name);
let param1_type = func.get_parameter(0).get_type();
let lhs =
if lhs.get_type() != param1_type {
bx.context.new_bitcast(None, lhs, param1_type)
}
else {
lhs
};
let param1_type = func.get_param(0).to_rvalue().get_type();
let param2_type = func.get_param(1).to_rvalue().get_type();
let lhs = bx.cx.bitcast_if_needed(lhs, param1_type);
let rhs = bx.cx.bitcast_if_needed(rhs, param2_type);
let result = bx.context.new_call(None, func, &[lhs, rhs]);
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
return Ok(bx.context.new_bitcast(None, result, vec_ty));
}
macro_rules! arith_red {
($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident,
$identity:expr) => {
if name == sym::$name {
require!(
ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
ret_ty
);
return match in_elem.kind() {
ty::Int(_) | ty::Uint(_) => {
let r = bx.$integer_reduce(args[0].immediate());
if $ordered {
// if overflow occurs, the result is the
// mathematical result modulo 2^n:
Ok(bx.$op(args[1].immediate(), r))
} else {
Ok(bx.$integer_reduce(args[0].immediate()))
}
}
ty::Float(f) => {
let acc = if $ordered {
// ordered arithmetic reductions take an accumulator
args[1].immediate()
} else {
// unordered arithmetic reductions use the identity accumulator
match f.bit_width() {
32 => bx.const_real(bx.type_f32(), $identity),
64 => bx.const_real(bx.type_f64(), $identity),
v => return_error!(
r#"
unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
sym::$name,
in_ty,
in_elem,
v,
ret_ty
),
}
};
Ok(bx.$float_reduce(acc, args[0].immediate()))
}
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
};
}
};
}
// TODO: use a recursive algorithm a-la Hacker's Delight.
arith_red!(
simd_reduce_add_unordered: vector_reduce_add,
vector_reduce_fadd_fast,
false,
add,
0.0
);
unimplemented!("simd {}", name);
}