Rollup merge of #125980 - kjetilkjeka:nvptx_remove_direct_passmode, r=davidtwco

Nvptx remove direct passmode

This PR does what should have been done in #117671. That is fully avoid using the `PassMode::Direct` for `extern "C" fn` for `nvptx64-nvidia-cuda` and enable the compatibility test. `@RalfJung` [pointed me in the right direction](https://github.com/rust-lang/rust/issues/117480#issuecomment-2137712501) for solving this issue.

There are still some ABI bugs after this PR is merged. These ABI tests are created based on what is actually correct, and since they continue passing with even more of them enabled things are improving. I don't have the time to tackle all the remaining issues right now, but I think getting these improvements merged is very valuable in themselves and plan to tackle more of them long term.

This also doesn't remove the use of `PassMode::Direct` for `extern "ptx-kernel" fn`. This was also not trivial to make work. And since the ABI is hidden behind an unstable feature it's less urgent.

I don't know if it's correct to request `@RalfJung` as a reviewer (due to team structures), but he helped me a lot to figure out this stuff. If that's not appropriate then `@davidtwco` would be a good candidate since he know about this topic from #117671

r​? `@RalfJung`
This commit is contained in:
Jubilee 2024-06-12 03:57:20 -07:00 committed by GitHub
commit 322af5c274
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 73 additions and 16 deletions

View file

@ -1,23 +1,54 @@
use crate::abi::call::{ArgAbi, FnAbi, PassMode, Reg, Size, Uniform};
use crate::abi::{HasDataLayout, TyAbiInterface};
use super::{ArgAttribute, ArgAttributes, ArgExtension, CastTarget};
fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
if ret.layout.is_aggregate() && ret.layout.size.bits() > 64 {
ret.make_indirect();
} else {
// FIXME: this is wrong! Need to decide which ABI we really want here.
ret.make_direct_deprecated();
if ret.layout.is_aggregate() && ret.layout.is_sized() {
classify_aggregate(ret)
} else if ret.layout.size.bits() < 32 && ret.layout.is_sized() {
ret.extend_integer_width_to(32);
}
}
fn classify_arg<Ty>(arg: &mut ArgAbi<'_, Ty>) {
if arg.layout.is_aggregate() {
arg.make_indirect_byval(None);
} else if arg.layout.size.bits() < 32 {
if arg.layout.is_aggregate() && arg.layout.is_sized() {
classify_aggregate(arg)
} else if arg.layout.size.bits() < 32 && arg.layout.is_sized() {
arg.extend_integer_width_to(32);
}
}
/// the pass mode used for aggregates in arg and ret position
fn classify_aggregate<Ty>(arg: &mut ArgAbi<'_, Ty>) {
let align_bytes = arg.layout.align.abi.bytes();
let size = arg.layout.size;
let reg = match align_bytes {
1 => Reg::i8(),
2 => Reg::i16(),
4 => Reg::i32(),
8 => Reg::i64(),
16 => Reg::i128(),
_ => unreachable!("Align is given as power of 2 no larger than 16 bytes"),
};
if align_bytes == size.bytes() {
arg.cast_to(CastTarget {
prefix: [Some(reg), None, None, None, None, None, None, None],
rest: Uniform::new(Reg::i8(), Size::from_bytes(0)),
attrs: ArgAttributes {
regular: ArgAttribute::default(),
arg_ext: ArgExtension::None,
pointee_size: Size::ZERO,
pointee_align: None,
},
});
} else {
arg.cast_to(Uniform::new(reg, size));
}
}
fn classify_arg_kernel<'a, Ty, C>(_cx: &C, arg: &mut ArgAbi<'a, Ty>)
where
Ty: TyAbiInterface<'a, C> + Copy,

View file

@ -40,6 +40,11 @@ pub struct TripleU16 {
h: u16,
}
#[repr(C)]
pub struct DoubleI32 {
f: i32,
g: i32,
}
#[repr(C)]
pub struct TripleU32 {
f: u32,
g: u32,
@ -175,6 +180,11 @@ pub unsafe extern "C" fn f_triple_u16_arg(_a: TripleU16) {}
#[no_mangle]
pub unsafe extern "C" fn f_triple_u32_arg(_a: TripleU32) {}
// CHECK: .visible .func f_double_i32_arg(
// CHECK: .param .align 4 .b8 f_double_i32_arg_param_0[8]
#[no_mangle]
pub unsafe extern "C" fn f_double_i32_arg(_a: DoubleI32) {}
// CHECK: .visible .func f_triple_u64_arg(
// CHECK: .param .align 8 .b8 f_triple_u64_arg_param_0[24]
#[no_mangle]

View file

@ -1,7 +1,6 @@
//@ assembly-output: ptx-linker
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_86 -Z unstable-options -Clinker-flavor=llbc
//@ only-nvptx64
//@ ignore-nvptx64
// The PTX ABI stability is tied to major versions of the PTX ISA
// These tests assume major version 7
@ -41,6 +40,11 @@ pub struct TripleU16 {
h: u16,
}
#[repr(C)]
pub struct DoubleI32 {
f: i32,
g: i32,
}
#[repr(C)]
pub struct TripleU32 {
f: u32,
g: u32,
@ -187,6 +191,12 @@ pub unsafe extern "C" fn f_triple_u16_ret() -> TripleU16 {
TripleU16 { f: 18, g: 19, h: 20 }
}
// CHECK: .visible .func (.param .align 4 .b8 func_retval0[8]) f_double_i32_ret(
#[no_mangle]
pub unsafe extern "C" fn f_double_i32_ret() -> DoubleI32 {
DoubleI32 { f: 1, g: 2 }
}
// CHECK: .visible .func (.param .align 4 .b8 func_retval0[12]) f_triple_u32_ret(
#[no_mangle]
pub unsafe extern "C" fn f_triple_u32_ret() -> TripleU32 {

View file

@ -50,6 +50,11 @@ pub struct TripleU16 {
h: u16,
}
#[repr(C)]
pub struct DoubleI32 {
f: i32,
g: i32,
}
#[repr(C)]
pub struct TripleU32 {
f: u32,
g: u32,
@ -180,6 +185,11 @@ pub unsafe extern "ptx-kernel" fn f_triple_u8_arg(_a: TripleU8) {}
#[no_mangle]
pub unsafe extern "ptx-kernel" fn f_triple_u16_arg(_a: TripleU16) {}
// CHECK: .visible .entry f_double_i32_arg(
// CHECK: .param .align 4 .b8 f_double_i32_arg_param_0[8]
#[no_mangle]
pub unsafe extern "ptx-kernel" fn f_double_i32_arg(_a: DoubleI32) {}
// CHECK: .visible .entry f_triple_u32_arg(
// CHECK: .param .align 4 .b8 f_triple_u32_arg_param_0[12]
#[no_mangle]

View file

@ -55,13 +55,9 @@
//@ revisions: csky
//@[csky] compile-flags: --target csky-unknown-linux-gnuabiv2
//@[csky] needs-llvm-components: csky
// FIXME: disabled on nvptx64 since the target ABI fails the sanity check
// see https://github.com/rust-lang/rust/issues/117480
/* revisions: nvptx64
[nvptx64] compile-flags: --target nvptx64-nvidia-cuda
[nvptx64] needs-llvm-components: nvptx
*/
//@ revisions: nvptx64
//@[nvptx64] compile-flags: --target nvptx64-nvidia-cuda
//@[nvptx64] needs-llvm-components: nvptx
#![feature(rustc_attrs, unsized_fn_params, transparent_unions)]
#![cfg_attr(not(host), feature(no_core, lang_items), no_std, no_core)]
#![allow(unused, improper_ctypes_definitions, internal_features)]