Support more SIMD intrinsics and refactor argument adjustment
This commit is contained in:
parent
eba654c57a
commit
4b40ac790d
2 changed files with 194 additions and 126 deletions
113
src/builder.rs
113
src/builder.rs
|
@ -48,6 +48,7 @@ use rustc_target::spec::{HasTargetSpec, Target};
|
|||
|
||||
use crate::common::{SignType, TypeReflection, type_is_pointer};
|
||||
use crate::context::CodegenCx;
|
||||
use crate::intrinsic::llvm;
|
||||
use crate::type_of::LayoutGccExt;
|
||||
|
||||
// TODO(antoyo)
|
||||
|
@ -224,18 +225,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
.zip(args.iter())
|
||||
.enumerate()
|
||||
.map(|(index, (expected_ty, &actual_val))| {
|
||||
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
|
||||
// last argument type check.
|
||||
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
|
||||
match &*func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
|
||||
| "__builtin_ia32_sqrtpd512_mask" => {
|
||||
if index == args.len() - 1 {
|
||||
return actual_val;
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
if llvm::ignore_arg_cast(&func_name, index, args.len()) {
|
||||
return actual_val;
|
||||
}
|
||||
|
||||
let actual_ty = actual_val.get_type();
|
||||
|
@ -302,7 +293,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
}
|
||||
|
||||
fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
|
||||
let mut args = self.check_ptr_call("call", func_ptr, args);
|
||||
let args = self.check_ptr_call("call", func_ptr, args);
|
||||
|
||||
// gccjit requires to use the result of functions, even when it's not used.
|
||||
// That's why we assign the result to a local or call add_eval().
|
||||
|
@ -314,92 +305,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
if return_type != void_type {
|
||||
unsafe { RETURN_VALUE_COUNT += 1 };
|
||||
let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
|
||||
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
|
||||
// arguments here.
|
||||
if gcc_func.get_param_count() != args.len() {
|
||||
let func_name = format!("{:?}", func_ptr);
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
|
||||
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
|
||||
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
|
||||
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
|
||||
| "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
|
||||
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
|
||||
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
|
||||
=> {
|
||||
// TODO: refactor by separating those intrinsics outside of this branch.
|
||||
let add_before_last_arg =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
|
||||
_ => false,
|
||||
};
|
||||
let new_first_arg_is_zero =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
|
||||
_ => false
|
||||
};
|
||||
let arg3_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
|
||||
_ => 2,
|
||||
};
|
||||
let mut new_args = args.to_vec();
|
||||
let arg3_type = gcc_func.get_param_type(arg3_index);
|
||||
let first_arg =
|
||||
if new_first_arg_is_zero {
|
||||
let vector_type = arg3_type.dyncast_vector().expect("vector type");
|
||||
let zero = self.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
self.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
|
||||
}
|
||||
else {
|
||||
self.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
|
||||
};
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, first_arg);
|
||||
}
|
||||
else {
|
||||
new_args.push(first_arg);
|
||||
}
|
||||
let arg4_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
|
||||
_ => 3,
|
||||
};
|
||||
let arg4_type = gcc_func.get_param_type(arg4_index);
|
||||
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, minus_one);
|
||||
}
|
||||
else {
|
||||
new_args.push(minus_one);
|
||||
}
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
if args.len() == 3 {
|
||||
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to
|
||||
// the same GCC intrinsic, but the former has 3 parameters and the
|
||||
// latter has 4 so it doesn't require this additional argument.
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
}
|
||||
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
new_args.push(self.context.new_rvalue_from_int(arg5_type, 4));
|
||||
args = new_args.into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
let func_name = format!("{:?}", func_ptr);
|
||||
let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args, &func_name);
|
||||
self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
|
||||
result.to_rvalue()
|
||||
}
|
||||
|
@ -1514,11 +1421,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
|
||||
}
|
||||
|
||||
pub fn vector_reduce_fadd_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
pub fn vector_reduce_fmul_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
|
@ -1553,6 +1460,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
let ones = vec![self.context.new_rvalue_one(element_type); num_units];
|
||||
let ones = self.context.new_rvalue_from_vector(None, cond_type, &ones);
|
||||
let inverted_masks = masks + ones;
|
||||
// NOTE: sometimes, the type of else_val can be different than the type of then_val in
|
||||
// libgccjit (vector of int vs vector of int32_t), but they should be the same for the AND
|
||||
// operation to work.
|
||||
let else_val = self.context.new_bitcast(None, else_val, then_val.get_type());
|
||||
let else_vals = inverted_masks & else_val;
|
||||
|
||||
then_vals | else_vals
|
||||
|
|
|
@ -1,6 +1,169 @@
|
|||
use gccjit::Function;
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::context::CodegenCx;
|
||||
use gccjit::{Function, FunctionPtrType, RValue, ToRValue};
|
||||
|
||||
use crate::{context::CodegenCx, builder::Builder};
|
||||
|
||||
pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str) -> Cow<'b, [RValue<'gcc>]> {
|
||||
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
|
||||
// arguments here.
|
||||
if gcc_func.get_param_count() != args.len() {
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
|
||||
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
|
||||
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
|
||||
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
|
||||
| "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
|
||||
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
|
||||
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
|
||||
=> {
|
||||
// TODO: refactor by separating those intrinsics outside of this branch.
|
||||
let add_before_last_arg =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
|
||||
_ => false,
|
||||
};
|
||||
let new_first_arg_is_zero =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
|
||||
_ => false
|
||||
};
|
||||
let arg3_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
|
||||
_ => 2,
|
||||
};
|
||||
let mut new_args = args.to_vec();
|
||||
let arg3_type = gcc_func.get_param_type(arg3_index);
|
||||
let first_arg =
|
||||
if new_first_arg_is_zero {
|
||||
let vector_type = arg3_type.dyncast_vector().expect("vector type");
|
||||
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
|
||||
}
|
||||
else {
|
||||
builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
|
||||
};
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, first_arg);
|
||||
}
|
||||
else {
|
||||
new_args.push(first_arg);
|
||||
}
|
||||
let arg4_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
|
||||
_ => 3,
|
||||
};
|
||||
let arg4_type = gcc_func.get_param_type(arg4_index);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, minus_one);
|
||||
}
|
||||
else {
|
||||
new_args.push(minus_one);
|
||||
}
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
|
||||
| "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
|
||||
| "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
|
||||
let mut last_arg = None;
|
||||
if args.len() == 4 {
|
||||
last_arg = new_args.pop();
|
||||
}
|
||||
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
|
||||
if args.len() == 3 {
|
||||
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
|
||||
// the same GCC intrinsic, but the former has 3 parameters and the
|
||||
// latter has 4 so it doesn't require this additional argument.
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
|
||||
}
|
||||
|
||||
if let Some(last_arg) = last_arg {
|
||||
new_args.push(last_arg);
|
||||
}
|
||||
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
|
||||
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
|
||||
new_args.push(undefined);
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
args
|
||||
}
|
||||
|
||||
pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
|
||||
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
|
||||
// last argument type check.
|
||||
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
|
||||
match func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
|
||||
| "__builtin_ia32_sqrtpd512_mask" | "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
|
||||
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
|
||||
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
|
||||
if index == args_len - 1 {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" => {
|
||||
if args_len == 4 && index == args_len - 1 {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(not(feature="master"))]
|
||||
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
|
||||
|
@ -37,29 +200,23 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
|
|||
"llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
|
||||
"llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
"llvm.x86.avx512.rcp14.ps.256" => "__builtin_ia32_rcp14ps256_mask",
|
||||
"llvm.x86.avx512.rcp14.ps.128" => "__builtin_ia32_rcp14ps128_mask",
|
||||
"llvm.x86.avx512.rcp14.pd.256" => "__builtin_ia32_rcp14pd256_mask",
|
||||
"llvm.x86.avx512.rcp14.pd.128" => "__builtin_ia32_rcp14pd128_mask",
|
||||
"llvm.x86.avx512.rsqrt14.ps.256" => "__builtin_ia32_rsqrt14ps256_mask",
|
||||
"llvm.x86.avx512.rsqrt14.ps.128" => "__builtin_ia32_rsqrt14ps128_mask",
|
||||
"llvm.x86.avx512.rsqrt14.pd.256" => "__builtin_ia32_rsqrt14pd256_mask",
|
||||
"llvm.x86.avx512.rsqrt14.pd.128" => "__builtin_ia32_rsqrt14pd128_mask",
|
||||
"llvm.x86.avx512.mask.getexp.ps.512" => "__builtin_ia32_getexpps512_mask",
|
||||
"llvm.x86.avx512.mask.getexp.ps.256" => "__builtin_ia32_getexpps256_mask",
|
||||
"llvm.x86.avx512.mask.getexp.ps.128" => "__builtin_ia32_getexpps128_mask",
|
||||
"llvm.x86.avx512.mask.getexp.pd.512" => "__builtin_ia32_getexppd512_mask",
|
||||
"llvm.x86.avx512.mask.getexp.pd.256" => "__builtin_ia32_getexppd256_mask",
|
||||
"llvm.x86.avx512.mask.getexp.pd.128" => "__builtin_ia32_getexppd128_mask",
|
||||
"llvm.x86.avx512.mask.rndscale.ps.256" => "__builtin_ia32_rndscaleps_256_mask",
|
||||
"llvm.x86.avx512.mask.rndscale.ps.128" => "__builtin_ia32_rndscaleps_128_mask",
|
||||
"llvm.x86.avx512.mask.rndscale.pd.256" => "__builtin_ia32_rndscalepd_256_mask",
|
||||
"llvm.x86.avx512.mask.rndscale.pd.128" => "__builtin_ia32_rndscalepd_128_mask",
|
||||
"llvm.x86.avx512.mask.scalef.ps.512" => "__builtin_ia32_scalefps512_mask",
|
||||
"llvm.x86.avx512.mask.scalef.ps.256" => "__builtin_ia32_scalefps256_mask",
|
||||
"llvm.x86.avx512.mask.scalef.ps.128" => "__builtin_ia32_scalefps128_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddsubps512_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddsubpd512_mask",
|
||||
"llvm.x86.avx512.pternlog.d.512" => "__builtin_ia32_pternlogd512_mask",
|
||||
"llvm.x86.avx512.pternlog.d.256" => "__builtin_ia32_pternlogd256_mask",
|
||||
"llvm.x86.avx512.pternlog.d.128" => "__builtin_ia32_pternlogd128_mask",
|
||||
"llvm.x86.avx512.pternlog.q.512" => "__builtin_ia32_pternlogq512_mask",
|
||||
"llvm.x86.avx512.pternlog.q.256" => "__builtin_ia32_pternlogq256_mask",
|
||||
"llvm.x86.avx512.pternlog.q.128" => "__builtin_ia32_pternlogq128_mask",
|
||||
"llvm.x86.avx512.add.ps.512" => "__builtin_ia32_addps512_mask",
|
||||
"llvm.x86.avx512.add.pd.512" => "__builtin_ia32_addpd512_mask",
|
||||
"llvm.x86.avx512.sub.ps.512" => "__builtin_ia32_subps512_mask",
|
||||
"llvm.x86.avx512.sub.pd.512" => "__builtin_ia32_subpd512_mask",
|
||||
"llvm.x86.avx512.mul.ps.512" => "__builtin_ia32_mulps512_mask",
|
||||
"llvm.x86.avx512.mul.pd.512" => "__builtin_ia32_mulpd512_mask",
|
||||
"llvm.x86.avx512.div.ps.512" => "__builtin_ia32_divps512_mask",
|
||||
"llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask",
|
||||
"llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask",
|
||||
|
||||
// The above doc points to unknown builtins for the following, so override them:
|
||||
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
|
||||
|
|
Loading…
Add table
Reference in a new issue