PTX support
- `--emit=asm --target=nvptx64-nvidia-cuda` can be used to turn a crate into a PTX module (a `.s` file). - intrinsics like `__syncthreads` and `blockIdx.x` are exposed as `"platform-intrinsics"`. - "cabi" has been implemented for the nvptx and nvptx64 architectures. i.e. `extern "C"` works. - a new ABI, `"ptx-kernel"`. That can be used to generate "global" functions. Example: `extern "ptx-kernel" fn kernel() { .. }`. All other functions are "device" functions.
This commit is contained in:
parent
b7e5148bbd
commit
18d49288d5
15 changed files with 344 additions and 3 deletions
|
@ -81,7 +81,7 @@ pub fn llvm(build: &Build, target: &str) {
|
|||
.profile(profile)
|
||||
.define("LLVM_ENABLE_ASSERTIONS", assertions)
|
||||
.define("LLVM_TARGETS_TO_BUILD",
|
||||
"X86;ARM;AArch64;Mips;PowerPC;SystemZ;JSBackend;MSP430;Sparc")
|
||||
"X86;ARM;AArch64;Mips;PowerPC;SystemZ;JSBackend;MSP430;Sparc;NVPTX")
|
||||
.define("LLVM_INCLUDE_EXAMPLES", "OFF")
|
||||
.define("LLVM_INCLUDE_TESTS", "OFF")
|
||||
.define("LLVM_INCLUDE_DOCS", "OFF")
|
||||
|
|
13
src/etc/platform-intrinsics/nvptx/cuda.json
Normal file
13
src/etc/platform-intrinsics/nvptx/cuda.json
Normal file
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"intrinsic_prefix": "_",
|
||||
"llvm_prefix": "llvm.cuda.",
|
||||
"intrinsics": [
|
||||
{
|
||||
"intrinsic": "syncthreads",
|
||||
"width": ["0"],
|
||||
"llvm": "syncthreads",
|
||||
"ret": "V",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
}
|
7
src/etc/platform-intrinsics/nvptx/info.json
Normal file
7
src/etc/platform-intrinsics/nvptx/info.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"platform": "nvptx",
|
||||
"number_info": {
|
||||
"signed": {}
|
||||
},
|
||||
"width_info": {}
|
||||
}
|
90
src/etc/platform-intrinsics/nvptx/sreg.json
Normal file
90
src/etc/platform-intrinsics/nvptx/sreg.json
Normal file
|
@ -0,0 +1,90 @@
|
|||
{
|
||||
"intrinsic_prefix": "_",
|
||||
"llvm_prefix": "llvm.nvvm.read.ptx.sreg.",
|
||||
"intrinsics": [
|
||||
{
|
||||
"intrinsic": "block_dim_x",
|
||||
"width": ["0"],
|
||||
"llvm": "ntid.x",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "block_dim_y",
|
||||
"width": ["0"],
|
||||
"llvm": "ntid.y",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "block_dim_z",
|
||||
"width": ["0"],
|
||||
"llvm": "ntid.z",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "block_idx_x",
|
||||
"width": ["0"],
|
||||
"llvm": "ctaid.x",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "block_idx_y",
|
||||
"width": ["0"],
|
||||
"llvm": "ctaid.y",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "block_idx_z",
|
||||
"width": ["0"],
|
||||
"llvm": "ctaid.z",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "grid_dim_x",
|
||||
"width": ["0"],
|
||||
"llvm": "nctaid.x",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "grid_dim_y",
|
||||
"width": ["0"],
|
||||
"llvm": "nctaid.y",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "grid_dim_z",
|
||||
"width": ["0"],
|
||||
"llvm": "nctaid.z",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "thread_idx_x",
|
||||
"width": ["0"],
|
||||
"llvm": "tid.x",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "thread_idx_y",
|
||||
"width": ["0"],
|
||||
"llvm": "tid.y",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "thread_idx_z",
|
||||
"width": ["0"],
|
||||
"llvm": "tid.z",
|
||||
"ret": "S32",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
}
|
|
@ -96,7 +96,7 @@ fn main() {
|
|||
|
||||
let optional_components =
|
||||
["x86", "arm", "aarch64", "mips", "powerpc", "pnacl", "systemz", "jsbackend", "msp430",
|
||||
"sparc"];
|
||||
"sparc", "nvptx"];
|
||||
|
||||
// FIXME: surely we don't need all these components, right? Stuff like mcjit
|
||||
// or interpreter the compiler itself never uses.
|
||||
|
|
|
@ -42,6 +42,7 @@ pub enum CallConv {
|
|||
X86StdcallCallConv = 64,
|
||||
X86FastcallCallConv = 65,
|
||||
ArmAapcsCallConv = 67,
|
||||
PtxKernel = 71,
|
||||
X86_64_SysV = 78,
|
||||
X86_64_Win64 = 79,
|
||||
X86_VectorCall = 80,
|
||||
|
|
|
@ -377,6 +377,11 @@ pub fn initialize_available_targets() {
|
|||
LLVMInitializeSparcTargetMC,
|
||||
LLVMInitializeSparcAsmPrinter,
|
||||
LLVMInitializeSparcAsmParser);
|
||||
init_target!(llvm_component = "nvptx",
|
||||
LLVMInitializeNVPTXTargetInfo,
|
||||
LLVMInitializeNVPTXTarget,
|
||||
LLVMInitializeNVPTXTargetMC,
|
||||
LLVMInitializeNVPTXAsmPrinter);
|
||||
}
|
||||
|
||||
pub fn last_error() -> Option<String> {
|
||||
|
|
|
@ -95,6 +95,7 @@ static VOID: Type = Type::Void;
|
|||
mod x86;
|
||||
mod arm;
|
||||
mod aarch64;
|
||||
mod nvptx;
|
||||
|
||||
impl Intrinsic {
|
||||
pub fn find(name: &str) -> Option<Intrinsic> {
|
||||
|
@ -104,6 +105,8 @@ impl Intrinsic {
|
|||
arm::find(name)
|
||||
} else if name.starts_with("aarch64_") {
|
||||
aarch64::find(name)
|
||||
} else if name.starts_with("nvptx_") {
|
||||
nvptx::find(name)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
92
src/librustc_platform_intrinsics/nvptx.rs
Normal file
92
src/librustc_platform_intrinsics/nvptx.rs
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// DO NOT EDIT: autogenerated by etc/platform-intrinsics/generator.py
|
||||
// ignore-tidy-linelength
|
||||
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use {Intrinsic, Type};
|
||||
use IntrinsicDef::Named;
|
||||
|
||||
// The default inlining settings trigger a pathological behaviour in
|
||||
// LLVM, which causes makes compilation very slow. See #28273.
|
||||
#[inline(never)]
|
||||
pub fn find(name: &str) -> Option<Intrinsic> {
|
||||
if !name.starts_with("nvptx") { return None }
|
||||
Some(match &name["nvptx".len()..] {
|
||||
"_syncthreads" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::VOID,
|
||||
definition: Named("llvm.cuda.syncthreads")
|
||||
},
|
||||
"_block_dim_x" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.x")
|
||||
},
|
||||
"_block_dim_y" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.y")
|
||||
},
|
||||
"_block_dim_z" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.z")
|
||||
},
|
||||
"_block_idx_x" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.x")
|
||||
},
|
||||
"_block_idx_y" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.y")
|
||||
},
|
||||
"_block_idx_z" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.z")
|
||||
},
|
||||
"_grid_dim_x" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.x")
|
||||
},
|
||||
"_grid_dim_y" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.y")
|
||||
},
|
||||
"_grid_dim_z" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.z")
|
||||
},
|
||||
"_thread_idx_x" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.tid.x")
|
||||
},
|
||||
"_thread_idx_y" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.tid.y")
|
||||
},
|
||||
"_thread_idx_z" => Intrinsic {
|
||||
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
|
||||
output: &::I32,
|
||||
definition: Named("llvm.nvvm.read.ptx.sreg.tid.z")
|
||||
},
|
||||
_ => return None,
|
||||
})
|
||||
}
|
|
@ -25,6 +25,8 @@ use cabi_mips64;
|
|||
use cabi_asmjs;
|
||||
use cabi_msp430;
|
||||
use cabi_sparc;
|
||||
use cabi_nvptx;
|
||||
use cabi_nvptx64;
|
||||
use machine::{llalign_of_min, llsize_of, llsize_of_alloc};
|
||||
use type_::Type;
|
||||
use type_of;
|
||||
|
@ -353,6 +355,7 @@ impl FnType {
|
|||
Win64 => llvm::X86_64_Win64,
|
||||
SysV64 => llvm::X86_64_SysV,
|
||||
Aapcs => llvm::ArmAapcsCallConv,
|
||||
PtxKernel => llvm::PtxKernel,
|
||||
|
||||
// These API constants ought to be more specific...
|
||||
Cdecl => llvm::CCallConv,
|
||||
|
@ -608,6 +611,8 @@ impl FnType {
|
|||
"wasm32" => cabi_asmjs::compute_abi_info(ccx, self),
|
||||
"msp430" => cabi_msp430::compute_abi_info(ccx, self),
|
||||
"sparc" => cabi_sparc::compute_abi_info(ccx, self),
|
||||
"nvptx" => cabi_nvptx::compute_abi_info(ccx, self),
|
||||
"nvptx64" => cabi_nvptx64::compute_abi_info(ccx, self),
|
||||
a => ccx.sess().fatal(&format!("unrecognized arch \"{}\" in target specification", a))
|
||||
}
|
||||
|
||||
|
|
53
src/librustc_trans/cabi_nvptx.rs
Normal file
53
src/librustc_trans/cabi_nvptx.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// Reference: PTX Writer's Guide to Interoperability
|
||||
// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability
|
||||
|
||||
#![allow(non_upper_case_globals)]
|
||||
|
||||
use llvm::Struct;
|
||||
|
||||
use abi::{self, ArgType, FnType};
|
||||
use context::CrateContext;
|
||||
use type_::Type;
|
||||
|
||||
fn ty_size(ty: Type) -> usize {
|
||||
abi::ty_size(ty, 4)
|
||||
}
|
||||
|
||||
fn classify_ret_ty(ccx: &CrateContext, ret: &mut ArgType) {
|
||||
if ret.ty.kind() == Struct && ty_size(ret.ty) > 32 {
|
||||
ret.make_indirect(ccx);
|
||||
} else {
|
||||
ret.extend_integer_width_to(32);
|
||||
}
|
||||
}
|
||||
|
||||
fn classify_arg_ty(ccx: &CrateContext, arg: &mut ArgType) {
|
||||
if arg.ty.kind() == Struct && ty_size(arg.ty) > 32 {
|
||||
arg.make_indirect(ccx);
|
||||
} else {
|
||||
arg.extend_integer_width_to(32);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_abi_info(ccx: &CrateContext, fty: &mut FnType) {
|
||||
if !fty.ret.is_ignore() {
|
||||
classify_ret_ty(ccx, &mut fty.ret);
|
||||
}
|
||||
|
||||
for arg in &mut fty.args {
|
||||
if arg.is_ignore() {
|
||||
continue;
|
||||
}
|
||||
classify_arg_ty(ccx, arg);
|
||||
}
|
||||
}
|
53
src/librustc_trans/cabi_nvptx64.rs
Normal file
53
src/librustc_trans/cabi_nvptx64.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// Reference: PTX Writer's Guide to Interoperability
|
||||
// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability
|
||||
|
||||
#![allow(non_upper_case_globals)]
|
||||
|
||||
use llvm::Struct;
|
||||
|
||||
use abi::{self, ArgType, FnType};
|
||||
use context::CrateContext;
|
||||
use type_::Type;
|
||||
|
||||
fn ty_size(ty: Type) -> usize {
|
||||
abi::ty_size(ty, 8)
|
||||
}
|
||||
|
||||
fn classify_ret_ty(ccx: &CrateContext, ret: &mut ArgType) {
|
||||
if ret.ty.kind() == Struct && ty_size(ret.ty) > 64 {
|
||||
ret.make_indirect(ccx);
|
||||
} else {
|
||||
ret.extend_integer_width_to(64);
|
||||
}
|
||||
}
|
||||
|
||||
fn classify_arg_ty(ccx: &CrateContext, arg: &mut ArgType) {
|
||||
if arg.ty.kind() == Struct && ty_size(arg.ty) > 64 {
|
||||
arg.make_indirect(ccx);
|
||||
} else {
|
||||
arg.extend_integer_width_to(64);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_abi_info(ccx: &CrateContext, fty: &mut FnType) {
|
||||
if !fty.ret.is_ignore() {
|
||||
classify_ret_ty(ccx, &mut fty.ret);
|
||||
}
|
||||
|
||||
for arg in &mut fty.args {
|
||||
if arg.is_ignore() {
|
||||
continue;
|
||||
}
|
||||
classify_arg_ty(ccx, arg);
|
||||
}
|
||||
}
|
|
@ -103,6 +103,8 @@ mod cabi_asmjs;
|
|||
mod cabi_mips;
|
||||
mod cabi_mips64;
|
||||
mod cabi_msp430;
|
||||
mod cabi_nvptx;
|
||||
mod cabi_nvptx64;
|
||||
mod cabi_powerpc;
|
||||
mod cabi_powerpc64;
|
||||
mod cabi_s390x;
|
||||
|
|
|
@ -41,6 +41,7 @@ pub enum Abi {
|
|||
Aapcs,
|
||||
Win64,
|
||||
SysV64,
|
||||
PtxKernel,
|
||||
|
||||
// Multiplatform / generic ABIs
|
||||
Rust,
|
||||
|
@ -82,6 +83,7 @@ const AbiDatas: &'static [AbiData] = &[
|
|||
AbiData {abi: Abi::Aapcs, name: "aapcs", generic: false },
|
||||
AbiData {abi: Abi::Win64, name: "win64", generic: false },
|
||||
AbiData {abi: Abi::SysV64, name: "sysv64", generic: false },
|
||||
AbiData {abi: Abi::PtxKernel, name: "ptx-kernel", generic: false },
|
||||
|
||||
// Cross-platform ABIs
|
||||
AbiData {abi: Abi::Rust, name: "Rust", generic: true },
|
||||
|
|
|
@ -318,6 +318,9 @@ declare_features! (
|
|||
|
||||
// Allow safe suggestions for potential type conversions.
|
||||
(active, safe_suggestion, "1.0.0", Some(37384)),
|
||||
|
||||
// `extern "ptx-*" fn()`
|
||||
(active, abi_ptx, "1.15.0", None),
|
||||
);
|
||||
|
||||
declare_features! (
|
||||
|
@ -986,7 +989,19 @@ impl<'a> PostExpansionVisitor<'a> {
|
|||
gate_feature_post!(&self, abi_sysv64, span,
|
||||
"sysv64 ABI is experimental and subject to change");
|
||||
},
|
||||
_ => {}
|
||||
Abi::PtxKernel => {
|
||||
gate_feature_post!(&self, abi_ptx, span,
|
||||
"PTX ABIs are experimental and subject to change");
|
||||
}
|
||||
// Stable
|
||||
Abi::Cdecl |
|
||||
Abi::Stdcall |
|
||||
Abi::Fastcall |
|
||||
Abi::Aapcs |
|
||||
Abi::Win64 |
|
||||
Abi::Rust |
|
||||
Abi::C |
|
||||
Abi::System => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue