PTX support

- `--emit=asm --target=nvptx64-nvidia-cuda` can be used to turn a crate
  into a PTX module (a `.s` file).

- intrinsics like `__syncthreads` and `blockIdx.x` are exposed as
  `"platform-intrinsics"`.

- "cabi" has been implemented for the nvptx and nvptx64 architectures.
  i.e. `extern "C"` works.

- a new ABI, `"ptx-kernel"`. That can be used to generate "global"
  functions. Example: `extern "ptx-kernel" fn kernel() { .. }`. All
  other functions are "device" functions.
This commit is contained in:
Jorge Aparicio 2016-12-22 16:24:29 -05:00
parent b7e5148bbd
commit 18d49288d5
15 changed files with 344 additions and 3 deletions

View file

@ -81,7 +81,7 @@ pub fn llvm(build: &Build, target: &str) {
.profile(profile)
.define("LLVM_ENABLE_ASSERTIONS", assertions)
.define("LLVM_TARGETS_TO_BUILD",
"X86;ARM;AArch64;Mips;PowerPC;SystemZ;JSBackend;MSP430;Sparc")
"X86;ARM;AArch64;Mips;PowerPC;SystemZ;JSBackend;MSP430;Sparc;NVPTX")
.define("LLVM_INCLUDE_EXAMPLES", "OFF")
.define("LLVM_INCLUDE_TESTS", "OFF")
.define("LLVM_INCLUDE_DOCS", "OFF")

View file

@ -0,0 +1,13 @@
{
"intrinsic_prefix": "_",
"llvm_prefix": "llvm.cuda.",
"intrinsics": [
{
"intrinsic": "syncthreads",
"width": ["0"],
"llvm": "syncthreads",
"ret": "V",
"args": []
}
]
}

View file

@ -0,0 +1,7 @@
{
"platform": "nvptx",
"number_info": {
"signed": {}
},
"width_info": {}
}

View file

@ -0,0 +1,90 @@
{
"intrinsic_prefix": "_",
"llvm_prefix": "llvm.nvvm.read.ptx.sreg.",
"intrinsics": [
{
"intrinsic": "block_dim_x",
"width": ["0"],
"llvm": "ntid.x",
"ret": "S32",
"args": []
},
{
"intrinsic": "block_dim_y",
"width": ["0"],
"llvm": "ntid.y",
"ret": "S32",
"args": []
},
{
"intrinsic": "block_dim_z",
"width": ["0"],
"llvm": "ntid.z",
"ret": "S32",
"args": []
},
{
"intrinsic": "block_idx_x",
"width": ["0"],
"llvm": "ctaid.x",
"ret": "S32",
"args": []
},
{
"intrinsic": "block_idx_y",
"width": ["0"],
"llvm": "ctaid.y",
"ret": "S32",
"args": []
},
{
"intrinsic": "block_idx_z",
"width": ["0"],
"llvm": "ctaid.z",
"ret": "S32",
"args": []
},
{
"intrinsic": "grid_dim_x",
"width": ["0"],
"llvm": "nctaid.x",
"ret": "S32",
"args": []
},
{
"intrinsic": "grid_dim_y",
"width": ["0"],
"llvm": "nctaid.y",
"ret": "S32",
"args": []
},
{
"intrinsic": "grid_dim_z",
"width": ["0"],
"llvm": "nctaid.z",
"ret": "S32",
"args": []
},
{
"intrinsic": "thread_idx_x",
"width": ["0"],
"llvm": "tid.x",
"ret": "S32",
"args": []
},
{
"intrinsic": "thread_idx_y",
"width": ["0"],
"llvm": "tid.y",
"ret": "S32",
"args": []
},
{
"intrinsic": "thread_idx_z",
"width": ["0"],
"llvm": "tid.z",
"ret": "S32",
"args": []
}
]
}

View file

@ -96,7 +96,7 @@ fn main() {
let optional_components =
["x86", "arm", "aarch64", "mips", "powerpc", "pnacl", "systemz", "jsbackend", "msp430",
"sparc"];
"sparc", "nvptx"];
// FIXME: surely we don't need all these components, right? Stuff like mcjit
// or interpreter the compiler itself never uses.

View file

@ -42,6 +42,7 @@ pub enum CallConv {
X86StdcallCallConv = 64,
X86FastcallCallConv = 65,
ArmAapcsCallConv = 67,
PtxKernel = 71,
X86_64_SysV = 78,
X86_64_Win64 = 79,
X86_VectorCall = 80,

View file

@ -377,6 +377,11 @@ pub fn initialize_available_targets() {
LLVMInitializeSparcTargetMC,
LLVMInitializeSparcAsmPrinter,
LLVMInitializeSparcAsmParser);
init_target!(llvm_component = "nvptx",
LLVMInitializeNVPTXTargetInfo,
LLVMInitializeNVPTXTarget,
LLVMInitializeNVPTXTargetMC,
LLVMInitializeNVPTXAsmPrinter);
}
pub fn last_error() -> Option<String> {

View file

@ -95,6 +95,7 @@ static VOID: Type = Type::Void;
mod x86;
mod arm;
mod aarch64;
mod nvptx;
impl Intrinsic {
pub fn find(name: &str) -> Option<Intrinsic> {
@ -104,6 +105,8 @@ impl Intrinsic {
arm::find(name)
} else if name.starts_with("aarch64_") {
aarch64::find(name)
} else if name.starts_with("nvptx_") {
nvptx::find(name)
} else {
None
}

View file

@ -0,0 +1,92 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// DO NOT EDIT: autogenerated by etc/platform-intrinsics/generator.py
// ignore-tidy-linelength
#![allow(unused_imports)]
use {Intrinsic, Type};
use IntrinsicDef::Named;
// The default inlining settings trigger a pathological behaviour in
// LLVM, which causes makes compilation very slow. See #28273.
#[inline(never)]
pub fn find(name: &str) -> Option<Intrinsic> {
if !name.starts_with("nvptx") { return None }
Some(match &name["nvptx".len()..] {
"_syncthreads" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::VOID,
definition: Named("llvm.cuda.syncthreads")
},
"_block_dim_x" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.x")
},
"_block_dim_y" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.y")
},
"_block_dim_z" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.z")
},
"_block_idx_x" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.x")
},
"_block_idx_y" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.y")
},
"_block_idx_z" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.z")
},
"_grid_dim_x" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.x")
},
"_grid_dim_y" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.y")
},
"_grid_dim_z" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.z")
},
"_thread_idx_x" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.tid.x")
},
"_thread_idx_y" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.tid.y")
},
"_thread_idx_z" => Intrinsic {
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
output: &::I32,
definition: Named("llvm.nvvm.read.ptx.sreg.tid.z")
},
_ => return None,
})
}

View file

@ -25,6 +25,8 @@ use cabi_mips64;
use cabi_asmjs;
use cabi_msp430;
use cabi_sparc;
use cabi_nvptx;
use cabi_nvptx64;
use machine::{llalign_of_min, llsize_of, llsize_of_alloc};
use type_::Type;
use type_of;
@ -353,6 +355,7 @@ impl FnType {
Win64 => llvm::X86_64_Win64,
SysV64 => llvm::X86_64_SysV,
Aapcs => llvm::ArmAapcsCallConv,
PtxKernel => llvm::PtxKernel,
// These API constants ought to be more specific...
Cdecl => llvm::CCallConv,
@ -608,6 +611,8 @@ impl FnType {
"wasm32" => cabi_asmjs::compute_abi_info(ccx, self),
"msp430" => cabi_msp430::compute_abi_info(ccx, self),
"sparc" => cabi_sparc::compute_abi_info(ccx, self),
"nvptx" => cabi_nvptx::compute_abi_info(ccx, self),
"nvptx64" => cabi_nvptx64::compute_abi_info(ccx, self),
a => ccx.sess().fatal(&format!("unrecognized arch \"{}\" in target specification", a))
}

View file

@ -0,0 +1,53 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Reference: PTX Writer's Guide to Interoperability
// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability
#![allow(non_upper_case_globals)]
use llvm::Struct;
use abi::{self, ArgType, FnType};
use context::CrateContext;
use type_::Type;
fn ty_size(ty: Type) -> usize {
abi::ty_size(ty, 4)
}
fn classify_ret_ty(ccx: &CrateContext, ret: &mut ArgType) {
if ret.ty.kind() == Struct && ty_size(ret.ty) > 32 {
ret.make_indirect(ccx);
} else {
ret.extend_integer_width_to(32);
}
}
fn classify_arg_ty(ccx: &CrateContext, arg: &mut ArgType) {
if arg.ty.kind() == Struct && ty_size(arg.ty) > 32 {
arg.make_indirect(ccx);
} else {
arg.extend_integer_width_to(32);
}
}
pub fn compute_abi_info(ccx: &CrateContext, fty: &mut FnType) {
if !fty.ret.is_ignore() {
classify_ret_ty(ccx, &mut fty.ret);
}
for arg in &mut fty.args {
if arg.is_ignore() {
continue;
}
classify_arg_ty(ccx, arg);
}
}

View file

@ -0,0 +1,53 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Reference: PTX Writer's Guide to Interoperability
// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability
#![allow(non_upper_case_globals)]
use llvm::Struct;
use abi::{self, ArgType, FnType};
use context::CrateContext;
use type_::Type;
fn ty_size(ty: Type) -> usize {
abi::ty_size(ty, 8)
}
fn classify_ret_ty(ccx: &CrateContext, ret: &mut ArgType) {
if ret.ty.kind() == Struct && ty_size(ret.ty) > 64 {
ret.make_indirect(ccx);
} else {
ret.extend_integer_width_to(64);
}
}
fn classify_arg_ty(ccx: &CrateContext, arg: &mut ArgType) {
if arg.ty.kind() == Struct && ty_size(arg.ty) > 64 {
arg.make_indirect(ccx);
} else {
arg.extend_integer_width_to(64);
}
}
pub fn compute_abi_info(ccx: &CrateContext, fty: &mut FnType) {
if !fty.ret.is_ignore() {
classify_ret_ty(ccx, &mut fty.ret);
}
for arg in &mut fty.args {
if arg.is_ignore() {
continue;
}
classify_arg_ty(ccx, arg);
}
}

View file

@ -103,6 +103,8 @@ mod cabi_asmjs;
mod cabi_mips;
mod cabi_mips64;
mod cabi_msp430;
mod cabi_nvptx;
mod cabi_nvptx64;
mod cabi_powerpc;
mod cabi_powerpc64;
mod cabi_s390x;

View file

@ -41,6 +41,7 @@ pub enum Abi {
Aapcs,
Win64,
SysV64,
PtxKernel,
// Multiplatform / generic ABIs
Rust,
@ -82,6 +83,7 @@ const AbiDatas: &'static [AbiData] = &[
AbiData {abi: Abi::Aapcs, name: "aapcs", generic: false },
AbiData {abi: Abi::Win64, name: "win64", generic: false },
AbiData {abi: Abi::SysV64, name: "sysv64", generic: false },
AbiData {abi: Abi::PtxKernel, name: "ptx-kernel", generic: false },
// Cross-platform ABIs
AbiData {abi: Abi::Rust, name: "Rust", generic: true },

View file

@ -318,6 +318,9 @@ declare_features! (
// Allow safe suggestions for potential type conversions.
(active, safe_suggestion, "1.0.0", Some(37384)),
// `extern "ptx-*" fn()`
(active, abi_ptx, "1.15.0", None),
);
declare_features! (
@ -986,7 +989,19 @@ impl<'a> PostExpansionVisitor<'a> {
gate_feature_post!(&self, abi_sysv64, span,
"sysv64 ABI is experimental and subject to change");
},
_ => {}
Abi::PtxKernel => {
gate_feature_post!(&self, abi_ptx, span,
"PTX ABIs are experimental and subject to change");
}
// Stable
Abi::Cdecl |
Abi::Stdcall |
Abi::Fastcall |
Abi::Aapcs |
Abi::Win64 |
Abi::Rust |
Abi::C |
Abi::System => {}
}
}
}