Auto merge of #34412 - gnzlbg:document_platform_intrinsics_generate, r=alexcrichton

Add x86 intrinsics for bit manipulation (BMI 1.0, BMI 2.0, and TBM).

This PR adds the LLVM x86 intrinsics for the bit manipulation instruction sets (BMI 1.0, BMI 2.0, and TBM).

The objective of this pull-request is to allow building a library that implements all the algorithms offered by those instruction sets, using compiler intrinsics for the targets that support them (by means of `target_feature`).

The target features added are:

- `bmi`: Bit Manipulation Instruction Set 1.0, available in Intel >= Haswell and AMD's >= Jaguar/Piledriver,
- `bmi2`: Bit Manipulation Instruction Set 2.0, available in Intel >= Haswell and AMD's >= Excavator,
- `tbm`: Trailing Bit Manipulation, available only in AMD's Piledriver (won't be available in newer CPUs).

The intrinsics added are:

- BMI 1.0:
  - `bextr`: Bit field extract (with register).
- BMI 2.0:
  - `bzhi`: Zero high bits starting with specified bit position.
  - `pdep`: Parallel bits deposit.
  - `pext`: Parallel bits extract.
- TBM:
 - `bextri`: Bit field extract (with immediate).
This commit is contained in:
bors 2016-07-05 08:25:37 -07:00 committed by GitHub
commit ec58d0c997
18 changed files with 1044 additions and 894 deletions

View file

@ -1,6 +1,6 @@
{ {
"platform": "aarch64", "platform": "aarch64_v",
"intrinsic_prefix": "aarch64_v", "intrinsic_prefix": "",
"llvm_prefix": "llvm.aarch64.neon.", "llvm_prefix": "llvm.aarch64.neon.",
"number_info": { "number_info": {
"signed": { "signed": {

View file

@ -1,6 +1,6 @@
{ {
"platform": "arm", "platform": "arm_v",
"intrinsic_prefix": "arm_v", "intrinsic_prefix": "",
"llvm_prefix": "llvm.neon.v", "llvm_prefix": "llvm.neon.v",
"number_info": { "number_info": {
"signed": { "signed": {

View file

@ -26,10 +26,9 @@ SPEC = re.compile(
class PlatformInfo(object): class PlatformInfo(object):
def __init__(self, json): def __init__(self, json):
self._platform = json['platform'] self._platform = json['platform']
self._intrinsic_prefix = json['intrinsic_prefix']
def intrinsic_prefix(self): def platform_prefix(self):
return self._intrinsic_prefix return self._platform
class IntrinsicSet(object): class IntrinsicSet(object):
def __init__(self, platform, json): def __init__(self, platform, json):
@ -38,6 +37,7 @@ class IntrinsicSet(object):
self._intrinsics = json['intrinsics'] self._intrinsics = json['intrinsics']
self._widths = json['width_info'] self._widths = json['width_info']
self._platform = platform self._platform = platform
self._intrinsic_prefix = json['intrinsic_prefix']
def intrinsics(self): def intrinsics(self):
for raw in self._intrinsics: for raw in self._intrinsics:
@ -48,6 +48,9 @@ class IntrinsicSet(object):
def platform(self): def platform(self):
return self._platform return self._platform
def intrinsic_prefix(self):
return self._intrinsic_prefix
def llvm_prefix(self): def llvm_prefix(self):
return self._llvm_prefix return self._llvm_prefix
@ -538,8 +541,14 @@ class MonomorphicIntrinsic(object):
*self._args, *self._args,
width = self._width) width = self._width)
def platform_prefix(self):
return self._platform.platform().platform_prefix()
def intrinsic_set_name(self):
return self._platform.intrinsic_prefix()
def intrinsic_name(self): def intrinsic_name(self):
return self._platform.platform().intrinsic_prefix() + self.intrinsic_suffix() return self._platform.intrinsic_prefix() + self.intrinsic_suffix()
def compiler_args(self): def compiler_args(self):
return ', '.join(arg.compiler_ctor_ref() for arg in self._args_raw) return ', '.join(arg.compiler_ctor_ref() for arg in self._args_raw)
@ -561,6 +570,27 @@ def parse_args():
formatter_class = argparse.RawDescriptionHelpFormatter, formatter_class = argparse.RawDescriptionHelpFormatter,
description = 'Render an intrinsic definition JSON to various formats.', description = 'Render an intrinsic definition JSON to various formats.',
epilog = textwrap.dedent('''\ epilog = textwrap.dedent('''\
Quick How-To:
There are two operating modes: single file and multiple files.
For example, ARM is specified as a single file. To generate the
compiler-definitions for ARM just pass the script the "arm.json" file:
python generator.py --format compiler-defs arm.json
The X86 architecture is specified as multiple files (for the different
instruction sets that x86 supports). To generate the compiler
definitions one needs to pass the script a "platform information file"
(with the -i flag) next to the files of the different intruction sets.
For example, to generate the X86 compiler-definitions for SSE4.2, just:
python generator.py --format compiler-defs -i x86/info.json sse42.json
And to generate the compiler-definitions for SSE4.1 and SSE4.2, just:
python generator.py --format compiler-defs -i x86/info.json sse41.json sse42.json
An intrinsic definition consists of a map with fields: An intrinsic definition consists of a map with fields:
- intrinsic: pattern for the name(s) of the vendor's C intrinsic(s) - intrinsic: pattern for the name(s) of the vendor's C intrinsic(s)
- llvm: pattern for the name(s) of the internal llvm intrinsic(s) - llvm: pattern for the name(s) of the internal llvm intrinsic(s)
@ -730,8 +760,9 @@ class ExternBlock(object):
return 'extern "platform-intrinsic" {' return 'extern "platform-intrinsic" {'
def render(self, mono): def render(self, mono):
return ' fn {}{};'.format(mono.intrinsic_name(), return ' fn {}{}{};'.format(mono.platform_prefix(),
mono.intrinsic_signature()) mono.intrinsic_name(),
mono.intrinsic_signature())
def close(self): def close(self):
return '}' return '}'
@ -765,7 +796,7 @@ use IntrinsicDef::Named;
#[inline(never)] #[inline(never)]
pub fn find(name: &str) -> Option<Intrinsic> {{ pub fn find(name: &str) -> Option<Intrinsic> {{
if !name.starts_with("{0}") {{ return None }} if !name.starts_with("{0}") {{ return None }}
Some(match &name["{0}".len()..] {{'''.format(platform.intrinsic_prefix()) Some(match &name["{0}".len()..] {{'''.format(platform.platform_prefix())
def render(self, mono): def render(self, mono):
return '''\ return '''\
@ -773,7 +804,7 @@ pub fn find(name: &str) -> Option<Intrinsic> {{
inputs: {{ static INPUTS: [&'static Type; {}] = [{}]; &INPUTS }}, inputs: {{ static INPUTS: [&'static Type; {}] = [{}]; &INPUTS }},
output: {}, output: {},
definition: Named("{}") definition: Named("{}")
}},'''.format(mono.intrinsic_suffix(), }},'''.format(mono.intrinsic_set_name() + mono.intrinsic_suffix(),
len(mono._args_raw), len(mono._args_raw),
mono.compiler_args(), mono.compiler_args(),
mono.compiler_ret(), mono.compiler_ret(),

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.avx.", "llvm_prefix": "llvm.x86.avx.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.avx2.", "llvm_prefix": "llvm.x86.avx2.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -0,0 +1,13 @@
{
"intrinsic_prefix": "_bmi",
"llvm_prefix": "llvm.x86.bmi.",
"intrinsics": [
{
"intrinsic": "_bextr_{0.bitwidth}",
"width": ["0"],
"llvm": "bextr.{0.bitwidth}",
"ret": "S(32-64)u",
"args": ["0", "0"]
}
]
}

View file

@ -0,0 +1,27 @@
{
"intrinsic_prefix": "_bmi2",
"llvm_prefix": "llvm.x86.bmi.",
"intrinsics": [
{
"intrinsic": "_bzhi_{0.bitwidth}",
"width": ["0"],
"llvm": "bzhi.{0.bitwidth}",
"ret": "S(32-64)u",
"args": ["0", "0"]
},
{
"intrinsic": "_pdep_{0.bitwidth}",
"width": ["0"],
"llvm": "pdep.{0.bitwidth}",
"ret": "S(32-64)u",
"args": ["0", "0"]
},
{
"intrinsic": "_pext_{0.bitwidth}",
"width": ["0"],
"llvm": "pext.{0.bitwidth}",
"ret": "S(32-64)u",
"args": ["0", "0"]
}
]
}

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.fma.", "llvm_prefix": "llvm.x86.fma.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -1,26 +1,30 @@
{ {
"platform": "x86", "platform": "x86",
"intrinsic_prefix": "x86_mm",
"number_info": { "number_info": {
"signed": { "signed": {
"kind": "s", "kind": "s",
"kind_short": "", "kind_short": "",
"data_type": { "pattern": "epi{bitwidth}" }, "data_type": { "pattern": "epi{bitwidth}" },
"bitwidth": { "pattern": "{bitwidth}" },
"data_type_short": { "8": "b", "16": "w", "32": "d", "64": "q" } "data_type_short": { "8": "b", "16": "w", "32": "d", "64": "q" }
}, },
"unsigned": { "unsigned": {
"kind": "u", "kind": "u",
"kind_short": "u", "kind_short": "u",
"data_type": { "pattern": "epu{bitwidth}" }, "data_type": { "pattern": "epu{bitwidth}" },
"bitwidth": { "pattern": "{bitwidth}" },
"data_type_short": { "8": "b", "16": "w", "32": "d", "64": "q" } "data_type_short": { "8": "b", "16": "w", "32": "d", "64": "q" }
}, },
"float": { "float": {
"kind": "f", "kind": "f",
"data_type": { "32": "ps", "64": "pd" }, "data_type": { "32": "ps", "64": "pd" },
"bitwidth": { "pattern": "{bitwidth}" },
"data_type_short": { "32": "ps", "64": "pd" } "data_type_short": { "32": "ps", "64": "pd" }
} }
}, },
"width_info": { "width_info": {
"32": { "width_mm": "32", "width_suffix": "" },
"64": { "width_mm": "64", "width_suffix": "" },
"128": { "width_mm": "", "width_suffix": "" }, "128": { "width_mm": "", "width_suffix": "" },
"256": { "width_mm": "256", "width_suffix": ".256" }, "256": { "width_mm": "256", "width_suffix": ".256" },
"512": { "width_mm": "512", "width_suffix": ".512" } "512": { "width_mm": "512", "width_suffix": ".512" }

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.sse.", "llvm_prefix": "llvm.x86.sse.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.sse2.", "llvm_prefix": "llvm.x86.sse2.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.sse3.", "llvm_prefix": "llvm.x86.sse3.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.sse41.", "llvm_prefix": "llvm.x86.sse41.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.sse42.", "llvm_prefix": "llvm.x86.sse42.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -1,4 +1,5 @@
{ {
"intrinsic_prefix": "_mm",
"llvm_prefix": "llvm.x86.ssse3.", "llvm_prefix": "llvm.x86.ssse3.",
"intrinsics": [ "intrinsics": [
{ {

View file

@ -0,0 +1,13 @@
{
"intrinsic_prefix": "_tbm",
"llvm_prefix": "llvm.x86.tbm.",
"intrinsics": [
{
"intrinsic": "_bextri_u{0.bitwidth}",
"width": ["0"],
"llvm": "bextri.u{0.bitwidth}",
"ret": "S(32-64)u",
"args": ["0", "0"]
}
]
}

View file

@ -30,12 +30,15 @@ const ARM_WHITELIST: &'static [&'static str] = &[
const X86_WHITELIST: &'static [&'static str] = &[ const X86_WHITELIST: &'static [&'static str] = &[
"avx\0", "avx\0",
"avx2\0", "avx2\0",
"bmi\0",
"bmi2\0",
"sse\0", "sse\0",
"sse2\0", "sse2\0",
"sse3\0", "sse3\0",
"sse4.1\0", "sse4.1\0",
"sse4.2\0", "sse4.2\0",
"ssse3\0", "ssse3\0",
"tbm\0",
]; ];
/// Add `target_feature = "..."` cfgs for a variety of platform /// Add `target_feature = "..."` cfgs for a variety of platform

File diff suppressed because it is too large Load diff