diff --git a/src/builder.rs b/src/builder.rs index 82b0e64e582..df5c29f625e 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -48,6 +48,7 @@ use rustc_target::spec::{HasTargetSpec, Target}; use crate::common::{SignType, TypeReflection, type_is_pointer}; use crate::context::CodegenCx; +use crate::intrinsic::llvm; use crate::type_of::LayoutGccExt; // TODO(antoyo) @@ -224,18 +225,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { .zip(args.iter()) .enumerate() .map(|(index, (expected_ty, &actual_val))| { - // NOTE: these intrinsics have missing parameters before the last one, so ignore the - // last argument type check. - // FIXME(antoyo): find a way to refactor in order to avoid this hack. - match &*func_name { - "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" - | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask" - | "__builtin_ia32_sqrtpd512_mask" => { - if index == args.len() - 1 { - return actual_val; - } - }, - _ => (), + if llvm::ignore_arg_cast(&func_name, index, args.len()) { + return actual_val; } let actual_ty = actual_val.get_type(); @@ -302,7 +293,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> { - let mut args = self.check_ptr_call("call", func_ptr, args); + let args = self.check_ptr_call("call", func_ptr, args); // gccjit requires to use the result of functions, even when it's not used. // That's why we assign the result to a local or call add_eval(). @@ -314,92 +305,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { if return_type != void_type { unsafe { RETURN_VALUE_COUNT += 1 }; let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT })); - // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing - // arguments here. - if gcc_func.get_param_count() != args.len() { - let func_name = format!("{:?}", func_ptr); - match &*func_name { - "__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask" - // FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs. - | "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask" - | "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" - | "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask" - | "__builtin_ia32_pmaxuq128_mask" - | "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask" - | "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" - | "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask" - | "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" - => { - // TODO: refactor by separating those intrinsics outside of this branch. - let add_before_last_arg = - match &*func_name { - "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" - | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" - | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true, - _ => false, - }; - let new_first_arg_is_zero = - match &*func_name { - "__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask" - | "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true, - _ => false - }; - let arg3_index = - match &*func_name { - "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1, - _ => 2, - }; - let mut new_args = args.to_vec(); - let arg3_type = gcc_func.get_param_type(arg3_index); - let first_arg = - if new_first_arg_is_zero { - let vector_type = arg3_type.dyncast_vector().expect("vector type"); - let zero = self.context.new_rvalue_zero(vector_type.get_element_type()); - let num_units = vector_type.get_num_units(); - self.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units]) - } - else { - self.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue() - }; - if add_before_last_arg { - new_args.insert(new_args.len() - 1, first_arg); - } - else { - new_args.push(first_arg); - } - let arg4_index = - match &*func_name { - "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2, - _ => 3, - }; - let arg4_type = gcc_func.get_param_type(arg4_index); - let minus_one = self.context.new_rvalue_from_int(arg4_type, -1); - if add_before_last_arg { - new_args.insert(new_args.len() - 1, minus_one); - } - else { - new_args.push(minus_one); - } - args = new_args.into(); - }, - "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => { - let mut new_args = args.to_vec(); - if args.len() == 3 { - // Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to - // the same GCC intrinsic, but the former has 3 parameters and the - // latter has 4 so it doesn't require this additional argument. - let arg4_type = gcc_func.get_param_type(3); - let minus_one = self.context.new_rvalue_from_int(arg4_type, -1); - new_args.push(minus_one); - } - - let arg5_type = gcc_func.get_param_type(4); - new_args.push(self.context.new_rvalue_from_int(arg5_type, 4)); - args = new_args.into(); - }, - _ => (), - } - } + let func_name = format!("{:?}", func_ptr); + let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args, &func_name); self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args)); result.to_rvalue() } @@ -1514,11 +1421,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b)) } - pub fn vector_reduce_fadd_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> { + pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { unimplemented!(); } - pub fn vector_reduce_fmul_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> { + pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> { unimplemented!(); } @@ -1553,6 +1460,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let ones = vec![self.context.new_rvalue_one(element_type); num_units]; let ones = self.context.new_rvalue_from_vector(None, cond_type, &ones); let inverted_masks = masks + ones; + // NOTE: sometimes, the type of else_val can be different than the type of then_val in + // libgccjit (vector of int vs vector of int32_t), but they should be the same for the AND + // operation to work. + let else_val = self.context.new_bitcast(None, else_val, then_val.get_type()); let else_vals = inverted_masks & else_val; then_vals | else_vals diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 5c836736083..1175ea00547 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -1,6 +1,169 @@ -use gccjit::Function; +use std::borrow::Cow; -use crate::context::CodegenCx; +use gccjit::{Function, FunctionPtrType, RValue, ToRValue}; + +use crate::{context::CodegenCx, builder::Builder}; + +pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str) -> Cow<'b, [RValue<'gcc>]> { + // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing + // arguments here. + if gcc_func.get_param_count() != args.len() { + match &*func_name { + "__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask" + // FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs. + | "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask" + | "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" + | "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask" + | "__builtin_ia32_pmaxuq128_mask" + | "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask" + | "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" + | "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask" + | "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" + => { + // TODO: refactor by separating those intrinsics outside of this branch. + let add_before_last_arg = + match &*func_name { + "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" + | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" + | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true, + _ => false, + }; + let new_first_arg_is_zero = + match &*func_name { + "__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask" + | "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true, + _ => false + }; + let arg3_index = + match &*func_name { + "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1, + _ => 2, + }; + let mut new_args = args.to_vec(); + let arg3_type = gcc_func.get_param_type(arg3_index); + let first_arg = + if new_first_arg_is_zero { + let vector_type = arg3_type.dyncast_vector().expect("vector type"); + let zero = builder.context.new_rvalue_zero(vector_type.get_element_type()); + let num_units = vector_type.get_num_units(); + builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units]) + } + else { + builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue() + }; + if add_before_last_arg { + new_args.insert(new_args.len() - 1, first_arg); + } + else { + new_args.push(first_arg); + } + let arg4_index = + match &*func_name { + "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2, + _ => 3, + }; + let arg4_type = gcc_func.get_param_type(arg4_index); + let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); + if add_before_last_arg { + new_args.insert(new_args.len() - 1, minus_one); + } + else { + new_args.push(minus_one); + } + args = new_args.into(); + }, + "__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask" + | "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask" + | "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => { + let mut new_args = args.to_vec(); + let arg5_type = gcc_func.get_param_type(4); + let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1); + new_args.push(minus_one); + args = new_args.into(); + }, + "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => { + let mut new_args = args.to_vec(); + + let mut last_arg = None; + if args.len() == 4 { + last_arg = new_args.pop(); + } + + let arg4_type = gcc_func.get_param_type(3); + let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); + new_args.push(minus_one); + + if args.len() == 3 { + // Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to + // the same GCC intrinsic, but the former has 3 parameters and the + // latter has 4 so it doesn't require this additional argument. + let arg5_type = gcc_func.get_param_type(4); + new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4)); + } + + if let Some(last_arg) = last_arg { + new_args.push(last_arg); + } + + args = new_args.into(); + }, + "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask" + | "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask" + | "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" + | "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => { + let mut new_args = args.to_vec(); + let last_arg = new_args.pop().expect("last arg"); + let arg4_type = gcc_func.get_param_type(3); + let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); + new_args.push(minus_one); + new_args.push(last_arg); + args = new_args.into(); + }, + "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask" => { + let mut new_args = args.to_vec(); + let last_arg = new_args.pop().expect("last arg"); + let arg3_type = gcc_func.get_param_type(2); + let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue(); + new_args.push(undefined); + let arg4_type = gcc_func.get_param_type(3); + let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); + new_args.push(minus_one); + new_args.push(last_arg); + args = new_args.into(); + }, + _ => (), + } + } + + args +} + +pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool { + // NOTE: these intrinsics have missing parameters before the last one, so ignore the + // last argument type check. + // FIXME(antoyo): find a way to refactor in order to avoid this hack. + match func_name { + "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" + | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask" + | "__builtin_ia32_sqrtpd512_mask" | "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask" + | "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask" + | "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask" + | "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" + | "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => { + if index == args_len - 1 { + return true; + } + }, + "__builtin_ia32_vfmaddps512_mask" => { + if args_len == 4 && index == args_len - 1 { + return true; + } + }, + _ => (), + } + + false +} #[cfg(not(feature="master"))] pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> { @@ -37,29 +200,23 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask", "llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask", "llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask", - "llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddps512_mask", - "llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddpd512_mask", - "llvm.x86.avx512.rcp14.ps.256" => "__builtin_ia32_rcp14ps256_mask", - "llvm.x86.avx512.rcp14.ps.128" => "__builtin_ia32_rcp14ps128_mask", - "llvm.x86.avx512.rcp14.pd.256" => "__builtin_ia32_rcp14pd256_mask", - "llvm.x86.avx512.rcp14.pd.128" => "__builtin_ia32_rcp14pd128_mask", - "llvm.x86.avx512.rsqrt14.ps.256" => "__builtin_ia32_rsqrt14ps256_mask", - "llvm.x86.avx512.rsqrt14.ps.128" => "__builtin_ia32_rsqrt14ps128_mask", - "llvm.x86.avx512.rsqrt14.pd.256" => "__builtin_ia32_rsqrt14pd256_mask", - "llvm.x86.avx512.rsqrt14.pd.128" => "__builtin_ia32_rsqrt14pd128_mask", - "llvm.x86.avx512.mask.getexp.ps.512" => "__builtin_ia32_getexpps512_mask", - "llvm.x86.avx512.mask.getexp.ps.256" => "__builtin_ia32_getexpps256_mask", - "llvm.x86.avx512.mask.getexp.ps.128" => "__builtin_ia32_getexpps128_mask", - "llvm.x86.avx512.mask.getexp.pd.512" => "__builtin_ia32_getexppd512_mask", - "llvm.x86.avx512.mask.getexp.pd.256" => "__builtin_ia32_getexppd256_mask", - "llvm.x86.avx512.mask.getexp.pd.128" => "__builtin_ia32_getexppd128_mask", - "llvm.x86.avx512.mask.rndscale.ps.256" => "__builtin_ia32_rndscaleps_256_mask", - "llvm.x86.avx512.mask.rndscale.ps.128" => "__builtin_ia32_rndscaleps_128_mask", - "llvm.x86.avx512.mask.rndscale.pd.256" => "__builtin_ia32_rndscalepd_256_mask", - "llvm.x86.avx512.mask.rndscale.pd.128" => "__builtin_ia32_rndscalepd_128_mask", - "llvm.x86.avx512.mask.scalef.ps.512" => "__builtin_ia32_scalefps512_mask", - "llvm.x86.avx512.mask.scalef.ps.256" => "__builtin_ia32_scalefps256_mask", - "llvm.x86.avx512.mask.scalef.ps.128" => "__builtin_ia32_scalefps128_mask", + "llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddsubps512_mask", + "llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddsubpd512_mask", + "llvm.x86.avx512.pternlog.d.512" => "__builtin_ia32_pternlogd512_mask", + "llvm.x86.avx512.pternlog.d.256" => "__builtin_ia32_pternlogd256_mask", + "llvm.x86.avx512.pternlog.d.128" => "__builtin_ia32_pternlogd128_mask", + "llvm.x86.avx512.pternlog.q.512" => "__builtin_ia32_pternlogq512_mask", + "llvm.x86.avx512.pternlog.q.256" => "__builtin_ia32_pternlogq256_mask", + "llvm.x86.avx512.pternlog.q.128" => "__builtin_ia32_pternlogq128_mask", + "llvm.x86.avx512.add.ps.512" => "__builtin_ia32_addps512_mask", + "llvm.x86.avx512.add.pd.512" => "__builtin_ia32_addpd512_mask", + "llvm.x86.avx512.sub.ps.512" => "__builtin_ia32_subps512_mask", + "llvm.x86.avx512.sub.pd.512" => "__builtin_ia32_subpd512_mask", + "llvm.x86.avx512.mul.ps.512" => "__builtin_ia32_mulps512_mask", + "llvm.x86.avx512.mul.pd.512" => "__builtin_ia32_mulpd512_mask", + "llvm.x86.avx512.div.ps.512" => "__builtin_ia32_divps512_mask", + "llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask", + "llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask", // The above doc points to unknown builtins for the following, so override them: "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",