Merge commit '649110751ef4f27440d7cc711b3e07d11bf02d4a' into sync-portable-simd-2024-02-18
This commit is contained in:
commit
b2691baa90
22 changed files with 218 additions and 354 deletions
|
@ -1,169 +0,0 @@
|
|||
//! This module contains the LLVM intrinsics bindings that provide the functionality for this
|
||||
//! crate.
|
||||
//!
|
||||
//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
|
||||
//!
|
||||
//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
|
||||
//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
|
||||
//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
|
||||
//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
|
||||
//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
|
||||
//! this means that division by poison or undef is like division by zero, which means it inflicts...
|
||||
//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
|
||||
//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
|
||||
//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
|
||||
//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
|
||||
//!
|
||||
//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
|
||||
//!
|
||||
//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
|
||||
|
||||
// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
|
||||
// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
|
||||
// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
|
||||
extern "platform-intrinsic" {
|
||||
/// add/fadd
|
||||
pub(crate) fn simd_add<T>(x: T, y: T) -> T;
|
||||
|
||||
/// sub/fsub
|
||||
pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// mul/fmul
|
||||
pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
|
||||
|
||||
/// udiv/sdiv/fdiv
|
||||
/// ints and uints: {s,u}div incur UB if division by zero occurs.
|
||||
/// ints: sdiv is UB for int::MIN / -1.
|
||||
/// floats: fdiv is never UB, but may create NaNs or infinities.
|
||||
pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// urem/srem/frem
|
||||
/// ints and uints: {s,u}rem incur UB if division by zero occurs.
|
||||
/// ints: srem is UB for int::MIN / -1.
|
||||
/// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
|
||||
pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// shl
|
||||
/// for (u)ints. poison if rhs >= lhs::BITS
|
||||
pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// ints: ashr
|
||||
/// uints: lshr
|
||||
/// poison if rhs >= lhs::BITS
|
||||
pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// and
|
||||
pub(crate) fn simd_and<T>(x: T, y: T) -> T;
|
||||
|
||||
/// or
|
||||
pub(crate) fn simd_or<T>(x: T, y: T) -> T;
|
||||
|
||||
/// xor
|
||||
pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
|
||||
|
||||
/// fptoui/fptosi/uitofp/sitofp
|
||||
/// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
|
||||
/// but the truncated value must fit in the target type or the result is poison.
|
||||
/// use `simd_as` instead for a cast that performs a saturating conversion.
|
||||
pub(crate) fn simd_cast<T, U>(x: T) -> U;
|
||||
/// follows Rust's `T as U` semantics, including saturating float casts
|
||||
/// which amounts to the same as `simd_cast` for many cases
|
||||
pub(crate) fn simd_as<T, U>(x: T) -> U;
|
||||
|
||||
/// neg/fneg
|
||||
/// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
|
||||
/// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
|
||||
/// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
|
||||
pub(crate) fn simd_neg<T>(x: T) -> T;
|
||||
|
||||
/// fabs
|
||||
pub(crate) fn simd_fabs<T>(x: T) -> T;
|
||||
|
||||
// minnum/maxnum
|
||||
pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
|
||||
pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
|
||||
|
||||
// these return Simd<int, N> with the same BITS size as the inputs
|
||||
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
|
||||
|
||||
// shufflevector
|
||||
// idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
|
||||
pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
|
||||
|
||||
/// llvm.masked.gather
|
||||
/// like a loop of pointer reads
|
||||
/// val: vector of values to select if a lane is masked
|
||||
/// ptr: vector of pointers to read from
|
||||
/// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
|
||||
/// note, the LLVM intrinsic accepts a mask vector of `<N x i1>`
|
||||
/// FIXME: review this if/when we fix up our mask story in general?
|
||||
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
|
||||
/// llvm.masked.scatter
|
||||
/// like gather, but more spicy, as it writes instead of reads
|
||||
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
|
||||
|
||||
// {s,u}add.sat
|
||||
pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
|
||||
|
||||
// {s,u}sub.sat
|
||||
pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
// reductions
|
||||
// llvm.vector.reduce.{add,fadd}
|
||||
pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
|
||||
// llvm.vector.reduce.{mul,fmul}
|
||||
pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
|
||||
#[allow(unused)]
|
||||
pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
|
||||
#[allow(unused)]
|
||||
pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
|
||||
pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
|
||||
pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
|
||||
pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
|
||||
pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
|
||||
pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
|
||||
|
||||
// truncate integer vector to bitmask
|
||||
// `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and
|
||||
// returns either an unsigned integer or array of `u8`.
|
||||
// Every element in the vector becomes a single bit in the returned bitmask.
|
||||
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
|
||||
// The bit order of the result depends on the byte endianness. LSB-first for little
|
||||
// endian and MSB-first for big endian.
|
||||
//
|
||||
// UB if called on a vector with values other than 0 and -1.
|
||||
#[allow(unused)]
|
||||
pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
|
||||
|
||||
// select
|
||||
// first argument is a vector of integers, -1 (all bits 1) is "true"
|
||||
// logically equivalent to (yes & m) | (no & (m^-1),
|
||||
// but you can use it on floats.
|
||||
pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
|
||||
#[allow(unused)]
|
||||
pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
|
||||
|
||||
/// getelementptr (without inbounds)
|
||||
/// equivalent to wrapping_offset
|
||||
pub(crate) fn simd_arith_offset<T, U>(ptr: T, offset: U) -> T;
|
||||
|
||||
/// equivalent to `T as U` semantics, specifically for pointers
|
||||
pub(crate) fn simd_cast_ptr<T, U>(ptr: T) -> U;
|
||||
|
||||
/// expose a pointer as an address
|
||||
pub(crate) fn simd_expose_addr<T, U>(ptr: T) -> U;
|
||||
|
||||
/// convert an exposed address back to a pointer
|
||||
pub(crate) fn simd_from_exposed_addr<T, U>(addr: T) -> U;
|
||||
|
||||
// Integer operations
|
||||
pub(crate) fn simd_bswap<T>(x: T) -> T;
|
||||
pub(crate) fn simd_bitreverse<T>(x: T) -> T;
|
||||
pub(crate) fn simd_ctlz<T>(x: T) -> T;
|
||||
pub(crate) fn simd_cttz<T>(x: T) -> T;
|
||||
}
|
|
@ -1,20 +1,38 @@
|
|||
#![no_std]
|
||||
#![feature(
|
||||
const_intrinsic_copy,
|
||||
const_refs_to_cell,
|
||||
const_maybe_uninit_as_mut_ptr,
|
||||
const_mut_refs,
|
||||
convert_float_to_int,
|
||||
core_intrinsics,
|
||||
decl_macro,
|
||||
inline_const,
|
||||
intra_doc_pointers,
|
||||
platform_intrinsics,
|
||||
repr_simd,
|
||||
simd_ffi,
|
||||
staged_api,
|
||||
stdsimd,
|
||||
strict_provenance,
|
||||
ptr_metadata
|
||||
)]
|
||||
#![cfg_attr(
|
||||
all(
|
||||
any(target_arch = "aarch64", target_arch = "arm",),
|
||||
any(
|
||||
all(target_feature = "v6", not(target_feature = "mclass")),
|
||||
all(target_feature = "mclass", target_feature = "dsp"),
|
||||
)
|
||||
),
|
||||
feature(stdarch_arm_dsp)
|
||||
)]
|
||||
#![cfg_attr(
|
||||
all(target_arch = "arm", target_feature = "v7"),
|
||||
feature(stdarch_arm_neon_intrinsics)
|
||||
)]
|
||||
#![cfg_attr(
|
||||
any(target_arch = "powerpc", target_arch = "powerpc64"),
|
||||
feature(stdarch_powerpc)
|
||||
)]
|
||||
#![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
|
||||
#![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
|
||||
#![allow(internal_features)]
|
||||
|
|
|
@ -12,9 +12,7 @@
|
|||
)]
|
||||
mod mask_impl;
|
||||
|
||||
use crate::simd::{
|
||||
cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount,
|
||||
};
|
||||
use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
|
||||
use core::cmp::Ordering;
|
||||
use core::{fmt, mem};
|
||||
|
||||
|
@ -35,7 +33,7 @@ mod sealed {
|
|||
|
||||
fn eq(self, other: Self) -> bool;
|
||||
|
||||
fn as_usize(self) -> usize;
|
||||
fn to_usize(self) -> usize;
|
||||
|
||||
type Unsigned: SimdElement;
|
||||
|
||||
|
@ -60,14 +58,23 @@ macro_rules! impl_element {
|
|||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
(value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all()
|
||||
// We can't use `Simd` directly, because `Simd`'s functions call this function and
|
||||
// we will end up with an infinite loop.
|
||||
// Safety: `value` is an integer vector
|
||||
unsafe {
|
||||
use core::intrinsics::simd;
|
||||
let falses: Simd<Self, N> = simd::simd_eq(value, Simd::splat(0 as _));
|
||||
let trues: Simd<Self, N> = simd::simd_eq(value, Simd::splat(-1 as _));
|
||||
let valid: Simd<Self, N> = simd::simd_or(falses, trues);
|
||||
simd::simd_reduce_all(valid)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn eq(self, other: Self) -> bool { self == other }
|
||||
|
||||
#[inline]
|
||||
fn as_usize(self) -> usize {
|
||||
fn to_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
|
||||
|
@ -141,8 +148,9 @@ where
|
|||
// but these are "dependently-sized" types, so copy elision it is!
|
||||
unsafe {
|
||||
let bytes: [u8; N] = mem::transmute_copy(&array);
|
||||
let bools: Simd<i8, N> = intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
|
||||
Mask::from_int_unchecked(intrinsics::simd_cast(bools))
|
||||
let bools: Simd<i8, N> =
|
||||
core::intrinsics::simd::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
|
||||
Mask::from_int_unchecked(core::intrinsics::simd::simd_cast(bools))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -160,7 +168,7 @@ where
|
|||
// This would be hypothetically valid as an "in-place" transmute,
|
||||
// but these are "dependently-sized" types, so copy elision it is!
|
||||
unsafe {
|
||||
let mut bytes: Simd<i8, N> = intrinsics::simd_cast(self.to_int());
|
||||
let mut bytes: Simd<i8, N> = core::intrinsics::simd::simd_cast(self.to_int());
|
||||
bytes &= Simd::splat(1i8);
|
||||
mem::transmute_copy(&bytes)
|
||||
}
|
||||
|
@ -175,7 +183,10 @@ where
|
|||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
|
||||
// Safety: the caller must confirm this invariant
|
||||
unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
|
||||
unsafe {
|
||||
core::intrinsics::assume(<T as Sealed>::valid(value));
|
||||
Self(mask_impl::Mask::from_int_unchecked(value))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a vector of integers to a mask, where 0 represents `false` and -1
|
||||
|
@ -374,15 +385,17 @@ where
|
|||
);
|
||||
|
||||
// Safety: the input and output are integer vectors
|
||||
let index: Simd<T, N> = unsafe { intrinsics::simd_cast(index) };
|
||||
let index: Simd<T, N> = unsafe { core::intrinsics::simd::simd_cast(index) };
|
||||
|
||||
let masked_index = self.select(index, Self::splat(true).to_int());
|
||||
|
||||
// Safety: the input and output are integer vectors
|
||||
let masked_index: Simd<T::Unsigned, N> = unsafe { intrinsics::simd_cast(masked_index) };
|
||||
let masked_index: Simd<T::Unsigned, N> =
|
||||
unsafe { core::intrinsics::simd::simd_cast(masked_index) };
|
||||
|
||||
// Safety: the input is an integer vector
|
||||
let min_index: T::Unsigned = unsafe { intrinsics::simd_reduce_min(masked_index) };
|
||||
let min_index: T::Unsigned =
|
||||
unsafe { core::intrinsics::simd::simd_reduce_min(masked_index) };
|
||||
|
||||
// Safety: the return value is the unsigned version of T
|
||||
let min_index: T = unsafe { core::mem::transmute_copy(&min_index) };
|
||||
|
@ -390,7 +403,7 @@ where
|
|||
if min_index.eq(T::TRUE) {
|
||||
None
|
||||
} else {
|
||||
Some(min_index.as_usize())
|
||||
Some(min_index.to_usize())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#![allow(unused_imports)]
|
||||
use super::MaskElement;
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
|
||||
use core::marker::PhantomData;
|
||||
|
||||
|
@ -109,14 +108,18 @@ where
|
|||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
pub fn to_int(self) -> Simd<T, N> {
|
||||
unsafe {
|
||||
intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE))
|
||||
core::intrinsics::simd::simd_select_bitmask(
|
||||
self.0,
|
||||
Simd::splat(T::TRUE),
|
||||
Simd::splat(T::FALSE),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
|
||||
unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
|
||||
unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
//! Masks that take up full SIMD vector registers.
|
||||
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount};
|
||||
|
||||
#[repr(transparent)]
|
||||
|
@ -138,7 +137,7 @@ where
|
|||
U: MaskElement,
|
||||
{
|
||||
// Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
|
||||
unsafe { Mask(intrinsics::simd_cast(self.0)) }
|
||||
unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -150,13 +149,16 @@ where
|
|||
unsafe {
|
||||
// Compute the bitmask
|
||||
let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask =
|
||||
intrinsics::simd_bitmask(self.0);
|
||||
core::intrinsics::simd::simd_bitmask(self.0);
|
||||
|
||||
// LLVM assumes bit order should match endianness
|
||||
if cfg!(target_endian = "big") {
|
||||
for x in bytes.as_mut() {
|
||||
*x = x.reverse_bits()
|
||||
}
|
||||
if N % 8 > 0 {
|
||||
bytes.as_mut()[N / 8] >>= 8 - N % 8;
|
||||
}
|
||||
}
|
||||
|
||||
bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref());
|
||||
|
@ -180,10 +182,13 @@ where
|
|||
for x in bytes.as_mut() {
|
||||
*x = x.reverse_bits();
|
||||
}
|
||||
if N % 8 > 0 {
|
||||
bytes.as_mut()[N / 8] >>= 8 - N % 8;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the regular mask
|
||||
Self::from_int_unchecked(intrinsics::simd_select_bitmask(
|
||||
Self::from_int_unchecked(core::intrinsics::simd::simd_select_bitmask(
|
||||
bytes,
|
||||
Self::splat(true).to_int(),
|
||||
Self::splat(false).to_int(),
|
||||
|
@ -199,7 +204,7 @@ where
|
|||
let resized = self.to_int().resize::<M>(T::FALSE);
|
||||
|
||||
// Safety: `resized` is an integer vector with length M, which must match T
|
||||
let bitmask: U = unsafe { intrinsics::simd_bitmask(resized) };
|
||||
let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized) };
|
||||
|
||||
// LLVM assumes bit order should match endianness
|
||||
if cfg!(target_endian = "big") {
|
||||
|
@ -223,7 +228,7 @@ where
|
|||
|
||||
// SAFETY: `mask` is the correct bitmask type for a u64 bitmask
|
||||
let mask: Simd<T, M> = unsafe {
|
||||
intrinsics::simd_select_bitmask(
|
||||
core::intrinsics::simd::simd_select_bitmask(
|
||||
bitmask,
|
||||
Simd::<T, M>::splat(T::TRUE),
|
||||
Simd::<T, M>::splat(T::FALSE),
|
||||
|
@ -274,14 +279,14 @@ where
|
|||
#[must_use = "method returns a new bool and does not mutate the original value"]
|
||||
pub fn any(self) -> bool {
|
||||
// Safety: use `self` as an integer vector
|
||||
unsafe { intrinsics::simd_reduce_any(self.to_int()) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_any(self.to_int()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
pub fn all(self) -> bool {
|
||||
// Safety: use `self` as an integer vector
|
||||
unsafe { intrinsics::simd_reduce_all(self.to_int()) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -306,7 +311,7 @@ where
|
|||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
fn bitand(self, rhs: Self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
|
||||
unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -320,7 +325,7 @@ where
|
|||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
fn bitor(self, rhs: Self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
|
||||
unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -334,7 +339,7 @@ where
|
|||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
fn bitxor(self, rhs: Self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
|
||||
unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
#[macro_use]
|
||||
mod swizzle;
|
||||
|
||||
pub(crate) mod intrinsics;
|
||||
|
||||
mod alias;
|
||||
mod cast;
|
||||
mod fmt;
|
||||
|
@ -27,8 +25,6 @@ pub mod simd {
|
|||
|
||||
pub mod cmp;
|
||||
|
||||
pub(crate) use crate::core_simd::intrinsics;
|
||||
|
||||
pub use crate::core_simd::alias::*;
|
||||
pub use crate::core_simd::cast::*;
|
||||
pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
|
||||
|
|
|
@ -37,7 +37,7 @@ where
|
|||
macro_rules! unsafe_base {
|
||||
($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
|
||||
// Safety: $lhs and $rhs are vectors
|
||||
unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
|
||||
unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ macro_rules! wrap_bitshift {
|
|||
#[allow(clippy::suspicious_arithmetic_impl)]
|
||||
// Safety: $lhs and the bitand result are vectors
|
||||
unsafe {
|
||||
$crate::simd::intrinsics::$simd_call(
|
||||
core::intrinsics::simd::$simd_call(
|
||||
$lhs,
|
||||
$rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
|
||||
)
|
||||
|
@ -97,7 +97,7 @@ macro_rules! int_divrem_guard {
|
|||
$rhs
|
||||
};
|
||||
// Safety: $lhs and rhs are vectors
|
||||
unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
|
||||
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
|
||||
use core::ops::{Neg, Not}; // unary ops
|
||||
|
||||
|
@ -15,7 +14,7 @@ macro_rules! neg {
|
|||
#[must_use = "operator returns a new vector without mutating the input"]
|
||||
fn neg(self) -> Self::Output {
|
||||
// Safety: `self` is a signed vector
|
||||
unsafe { intrinsics::simd_neg(self) }
|
||||
unsafe { core::intrinsics::simd::simd_neg(self) }
|
||||
}
|
||||
})*
|
||||
}
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
|
||||
|
||||
impl<T, const N: usize> Mask<T, N>
|
||||
|
@ -29,7 +28,7 @@ where
|
|||
{
|
||||
// Safety: The mask has been cast to a vector of integers,
|
||||
// and the operands to select between are vectors of the same type and length.
|
||||
unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
|
||||
unsafe { core::intrinsics::simd::simd_select(self.to_int(), true_values, false_values) }
|
||||
}
|
||||
|
||||
/// Choose elements from two masks.
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
use crate::simd::{
|
||||
intrinsics,
|
||||
ptr::{SimdConstPtr, SimdMutPtr},
|
||||
LaneCount, Mask, Simd, SimdElement, SupportedLaneCount,
|
||||
};
|
||||
|
@ -31,14 +30,14 @@ macro_rules! impl_number {
|
|||
fn simd_eq(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_eq(self, other)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_ne(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ne(self, other)) }
|
||||
}
|
||||
}
|
||||
)*
|
||||
|
@ -60,14 +59,14 @@ macro_rules! impl_mask {
|
|||
fn simd_eq(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Self::from_int_unchecked(intrinsics::simd_eq(self.to_int(), other.to_int())) }
|
||||
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_eq(self.to_int(), other.to_int())) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_ne(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Self::from_int_unchecked(intrinsics::simd_ne(self.to_int(), other.to_int())) }
|
||||
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ne(self.to_int(), other.to_int())) }
|
||||
}
|
||||
}
|
||||
)*
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
use crate::simd::{
|
||||
cmp::SimdPartialEq,
|
||||
intrinsics,
|
||||
ptr::{SimdConstPtr, SimdMutPtr},
|
||||
LaneCount, Mask, Simd, SupportedLaneCount,
|
||||
};
|
||||
|
@ -57,28 +56,28 @@ macro_rules! impl_integer {
|
|||
fn simd_lt(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_le(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_gt(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_ge(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -123,28 +122,28 @@ macro_rules! impl_float {
|
|||
fn simd_lt(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_le(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_gt(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_ge(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
|
||||
unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
|
||||
}
|
||||
}
|
||||
)*
|
||||
|
@ -164,28 +163,28 @@ macro_rules! impl_mask {
|
|||
fn simd_lt(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Self::from_int_unchecked(intrinsics::simd_lt(self.to_int(), other.to_int())) }
|
||||
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_lt(self.to_int(), other.to_int())) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_le(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Self::from_int_unchecked(intrinsics::simd_le(self.to_int(), other.to_int())) }
|
||||
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_le(self.to_int(), other.to_int())) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_gt(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Self::from_int_unchecked(intrinsics::simd_gt(self.to_int(), other.to_int())) }
|
||||
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_gt(self.to_int(), other.to_int())) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_ge(self, other: Self) -> Self::Mask {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Self::from_int_unchecked(intrinsics::simd_ge(self.to_int(), other.to_int())) }
|
||||
unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ge(self.to_int(), other.to_int())) }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use super::sealed::Sealed;
|
||||
use crate::simd::{
|
||||
cmp::{SimdPartialEq, SimdPartialOrd},
|
||||
intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount,
|
||||
LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount,
|
||||
};
|
||||
|
||||
/// Operations on SIMD vectors of floats.
|
||||
|
@ -259,7 +259,7 @@ macro_rules! impl_trait {
|
|||
fn cast<T: SimdCast>(self) -> Self::Cast<T>
|
||||
{
|
||||
// Safety: supported types are guaranteed by SimdCast
|
||||
unsafe { intrinsics::simd_as(self) }
|
||||
unsafe { core::intrinsics::simd::simd_as(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -269,7 +269,7 @@ macro_rules! impl_trait {
|
|||
Self::Scalar: core::convert::FloatToInt<I>,
|
||||
{
|
||||
// Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants
|
||||
unsafe { intrinsics::simd_cast(self) }
|
||||
unsafe { core::intrinsics::simd::simd_cast(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -289,7 +289,7 @@ macro_rules! impl_trait {
|
|||
#[inline]
|
||||
fn abs(self) -> Self {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { intrinsics::simd_fabs(self) }
|
||||
unsafe { core::intrinsics::simd::simd_fabs(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -363,13 +363,13 @@ macro_rules! impl_trait {
|
|||
#[inline]
|
||||
fn simd_min(self, other: Self) -> Self {
|
||||
// Safety: `self` and `other` are float vectors
|
||||
unsafe { intrinsics::simd_fmin(self, other) }
|
||||
unsafe { core::intrinsics::simd::simd_fmin(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn simd_max(self, other: Self) -> Self {
|
||||
// Safety: `self` and `other` are floating point vectors
|
||||
unsafe { intrinsics::simd_fmax(self, other) }
|
||||
unsafe { core::intrinsics::simd::simd_fmax(self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -391,7 +391,7 @@ macro_rules! impl_trait {
|
|||
self.as_array().iter().sum()
|
||||
} else {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { intrinsics::simd_reduce_add_ordered(self, 0.) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0.) }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -402,20 +402,20 @@ macro_rules! impl_trait {
|
|||
self.as_array().iter().product()
|
||||
} else {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { intrinsics::simd_reduce_mul_ordered(self, 1.) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1.) }
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_max(self) -> Self::Scalar {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { intrinsics::simd_reduce_max(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_max(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_min(self) -> Self::Scalar {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { intrinsics::simd_reduce_min(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_min(self) }
|
||||
}
|
||||
}
|
||||
)*
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use super::sealed::Sealed;
|
||||
use crate::simd::{
|
||||
cmp::SimdPartialOrd, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
|
||||
cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
|
||||
SupportedLaneCount,
|
||||
};
|
||||
|
||||
|
@ -237,19 +237,19 @@ macro_rules! impl_trait {
|
|||
#[inline]
|
||||
fn cast<T: SimdCast>(self) -> Self::Cast<T> {
|
||||
// Safety: supported types are guaranteed by SimdCast
|
||||
unsafe { intrinsics::simd_as(self) }
|
||||
unsafe { core::intrinsics::simd::simd_as(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn saturating_add(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { intrinsics::simd_saturating_add(self, second) }
|
||||
unsafe { core::intrinsics::simd::simd_saturating_add(self, second) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn saturating_sub(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { intrinsics::simd_saturating_sub(self, second) }
|
||||
unsafe { core::intrinsics::simd::simd_saturating_sub(self, second) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -293,55 +293,55 @@ macro_rules! impl_trait {
|
|||
#[inline]
|
||||
fn reduce_sum(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_add_ordered(self, 0) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_product(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_max(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_max(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_max(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_min(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_min(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_min(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_and(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_and(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_and(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_or(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_or(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_or(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_xor(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_xor(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_xor(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn swap_bytes(self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_bswap(self) }
|
||||
unsafe { core::intrinsics::simd::simd_bswap(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reverse_bits(self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_bitreverse(self) }
|
||||
unsafe { core::intrinsics::simd::simd_bitreverse(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use super::sealed::Sealed;
|
||||
use crate::simd::{intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
|
||||
use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
|
||||
|
||||
/// Operations on SIMD vectors of unsigned integers.
|
||||
pub trait SimdUint: Copy + Sealed {
|
||||
|
@ -117,7 +117,7 @@ macro_rules! impl_trait {
|
|||
#[inline]
|
||||
fn cast<T: SimdCast>(self) -> Self::Cast<T> {
|
||||
// Safety: supported types are guaranteed by SimdCast
|
||||
unsafe { intrinsics::simd_as(self) }
|
||||
unsafe { core::intrinsics::simd::simd_as(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -129,79 +129,79 @@ macro_rules! impl_trait {
|
|||
#[inline]
|
||||
fn saturating_add(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { intrinsics::simd_saturating_add(self, second) }
|
||||
unsafe { core::intrinsics::simd::simd_saturating_add(self, second) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn saturating_sub(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { intrinsics::simd_saturating_sub(self, second) }
|
||||
unsafe { core::intrinsics::simd::simd_saturating_sub(self, second) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_sum(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_add_ordered(self, 0) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_product(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_max(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_max(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_max(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_min(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_min(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_min(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_and(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_and(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_and(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_or(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_or(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_or(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_xor(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_reduce_xor(self) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_xor(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn swap_bytes(self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_bswap(self) }
|
||||
unsafe { core::intrinsics::simd::simd_bswap(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reverse_bits(self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_bitreverse(self) }
|
||||
unsafe { core::intrinsics::simd::simd_bitreverse(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn leading_zeros(self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_ctlz(self) }
|
||||
unsafe { core::intrinsics::simd::simd_ctlz(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trailing_zeros(self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { intrinsics::simd_cttz(self) }
|
||||
unsafe { core::intrinsics::simd::simd_cttz(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
use super::sealed::Sealed;
|
||||
use crate::simd::{
|
||||
cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount,
|
||||
};
|
||||
use crate::simd::{cmp::SimdPartialEq, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount};
|
||||
|
||||
/// Operations on SIMD vectors of constant pointers.
|
||||
pub trait SimdConstPtr: Copy + Sealed {
|
||||
|
@ -103,13 +101,13 @@ where
|
|||
assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
|
||||
|
||||
// Safety: pointers can be cast
|
||||
unsafe { intrinsics::simd_cast_ptr(self) }
|
||||
unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn cast_mut(self) -> Self::MutPtr {
|
||||
// Safety: pointers can be cast
|
||||
unsafe { intrinsics::simd_cast_ptr(self) }
|
||||
unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -135,19 +133,19 @@ where
|
|||
#[inline]
|
||||
fn expose_addr(self) -> Self::Usize {
|
||||
// Safety: `self` is a pointer vector
|
||||
unsafe { intrinsics::simd_expose_addr(self) }
|
||||
unsafe { core::intrinsics::simd::simd_expose_addr(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_exposed_addr(addr: Self::Usize) -> Self {
|
||||
// Safety: `self` is a pointer vector
|
||||
unsafe { intrinsics::simd_from_exposed_addr(addr) }
|
||||
unsafe { core::intrinsics::simd::simd_from_exposed_addr(addr) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn wrapping_offset(self, count: Self::Isize) -> Self {
|
||||
// Safety: simd_arith_offset takes a vector of pointers and a vector of offsets
|
||||
unsafe { intrinsics::simd_arith_offset(self, count) }
|
||||
unsafe { core::intrinsics::simd::simd_arith_offset(self, count) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
use super::sealed::Sealed;
|
||||
use crate::simd::{
|
||||
cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount,
|
||||
};
|
||||
use crate::simd::{cmp::SimdPartialEq, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount};
|
||||
|
||||
/// Operations on SIMD vectors of mutable pointers.
|
||||
pub trait SimdMutPtr: Copy + Sealed {
|
||||
|
@ -100,13 +98,13 @@ where
|
|||
assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
|
||||
|
||||
// Safety: pointers can be cast
|
||||
unsafe { intrinsics::simd_cast_ptr(self) }
|
||||
unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn cast_const(self) -> Self::ConstPtr {
|
||||
// Safety: pointers can be cast
|
||||
unsafe { intrinsics::simd_cast_ptr(self) }
|
||||
unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -132,19 +130,19 @@ where
|
|||
#[inline]
|
||||
fn expose_addr(self) -> Self::Usize {
|
||||
// Safety: `self` is a pointer vector
|
||||
unsafe { intrinsics::simd_expose_addr(self) }
|
||||
unsafe { core::intrinsics::simd::simd_expose_addr(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_exposed_addr(addr: Self::Usize) -> Self {
|
||||
// Safety: `self` is a pointer vector
|
||||
unsafe { intrinsics::simd_from_exposed_addr(addr) }
|
||||
unsafe { core::intrinsics::simd::simd_from_exposed_addr(addr) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn wrapping_offset(self, count: Self::Isize) -> Self {
|
||||
// Safety: simd_arith_offset takes a vector of pointers and a vector of offsets
|
||||
unsafe { intrinsics::simd_arith_offset(self, count) }
|
||||
unsafe { core::intrinsics::simd::simd_arith_offset(self, count) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
|
||||
|
||||
/// Constructs a new SIMD vector by copying elements from selected elements in other vectors.
|
||||
|
@ -88,7 +87,7 @@ pub trait Swizzle<const N: usize> {
|
|||
{
|
||||
// Safety: `vector` is a vector, and the index is a const array of u32.
|
||||
unsafe {
|
||||
intrinsics::simd_shuffle(
|
||||
core::intrinsics::simd::simd_shuffle(
|
||||
vector,
|
||||
vector,
|
||||
const {
|
||||
|
@ -124,7 +123,7 @@ pub trait Swizzle<const N: usize> {
|
|||
{
|
||||
// Safety: `first` and `second` are vectors, and the index is a const array of u32.
|
||||
unsafe {
|
||||
intrinsics::simd_shuffle(
|
||||
core::intrinsics::simd::simd_shuffle(
|
||||
first,
|
||||
second,
|
||||
const {
|
||||
|
|
|
@ -44,7 +44,7 @@ where
|
|||
))]
|
||||
8 => transize(vtbl1_u8, self, idxs),
|
||||
#[cfg(target_feature = "ssse3")]
|
||||
16 => transize(x86::_mm_shuffle_epi8, self, idxs),
|
||||
16 => transize(x86::_mm_shuffle_epi8, self, zeroing_idxs(idxs)),
|
||||
#[cfg(target_feature = "simd128")]
|
||||
16 => transize(wasm::i8x16_swizzle, self, idxs),
|
||||
#[cfg(all(
|
||||
|
@ -54,9 +54,9 @@ where
|
|||
))]
|
||||
16 => transize(vqtbl1q_u8, self, idxs),
|
||||
#[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
|
||||
32 => transize_raw(avx2_pshufb, self, idxs),
|
||||
32 => transize(avx2_pshufb, self, idxs),
|
||||
#[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
|
||||
32 => transize(x86::_mm256_permutexvar_epi8, self, idxs),
|
||||
32 => transize(x86::_mm256_permutexvar_epi8, zeroing_idxs(idxs), self),
|
||||
// Notable absence: avx512bw shuffle
|
||||
// If avx512bw is available, odds of avx512vbmi are good
|
||||
// FIXME: initial AVX512VBMI variant didn't actually pass muster
|
||||
|
@ -129,45 +129,25 @@ unsafe fn avx2_pshufb(bytes: Simd<u8, 32>, idxs: Simd<u8, 32>) -> Simd<u8, 32> {
|
|||
#[inline(always)]
|
||||
unsafe fn transize<T, const N: usize>(
|
||||
f: unsafe fn(T, T) -> T,
|
||||
bytes: Simd<u8, N>,
|
||||
idxs: Simd<u8, N>,
|
||||
a: Simd<u8, N>,
|
||||
b: Simd<u8, N>,
|
||||
) -> Simd<u8, N>
|
||||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
let idxs = zeroing_idxs(idxs);
|
||||
// SAFETY: Same obligation to use this function as to use mem::transmute_copy.
|
||||
unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) }
|
||||
unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) }
|
||||
}
|
||||
|
||||
/// Make indices that yield 0 for this architecture
|
||||
/// Make indices that yield 0 for x86
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[allow(unused)]
|
||||
#[inline(always)]
|
||||
fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N>
|
||||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
// On x86, make sure the top bit is set.
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
let idxs = {
|
||||
use crate::simd::cmp::SimdPartialOrd;
|
||||
idxs.simd_lt(Simd::splat(N as u8))
|
||||
.select(idxs, Simd::splat(u8::MAX))
|
||||
};
|
||||
// Simply do nothing on most architectures.
|
||||
idxs
|
||||
}
|
||||
|
||||
/// As transize but no implicit call to `zeroing_idxs`.
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
unsafe fn transize_raw<T, const N: usize>(
|
||||
f: unsafe fn(T, T) -> T,
|
||||
bytes: Simd<u8, N>,
|
||||
idxs: Simd<u8, N>,
|
||||
) -> Simd<u8, N>
|
||||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
// SAFETY: Same obligation to use this function as to use mem::transmute_copy.
|
||||
unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) }
|
||||
use crate::simd::cmp::SimdPartialOrd;
|
||||
idxs.simd_lt(Simd::splat(N as u8))
|
||||
.select(idxs, Simd::splat(u8::MAX))
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
use crate::simd::{
|
||||
cmp::SimdPartialOrd,
|
||||
intrinsics,
|
||||
ptr::{SimdConstPtr, SimdMutPtr},
|
||||
LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
|
||||
};
|
||||
|
@ -194,7 +193,7 @@ where
|
|||
/// With padding, `read_unaligned` will read past the end of an array of N elements.
|
||||
///
|
||||
/// # Safety
|
||||
/// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`.
|
||||
/// Reading `ptr` must be safe, as if by `<*const [T; N]>::read`.
|
||||
#[inline]
|
||||
const unsafe fn load(ptr: *const [T; N]) -> Self {
|
||||
// There are potentially simpler ways to write this function, but this should result in
|
||||
|
@ -215,7 +214,7 @@ where
|
|||
/// See `load` as to why this function is necessary.
|
||||
///
|
||||
/// # Safety
|
||||
/// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`.
|
||||
/// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write`.
|
||||
#[inline]
|
||||
const unsafe fn store(self, ptr: *mut [T; N]) {
|
||||
// There are potentially simpler ways to write this function, but this should result in
|
||||
|
@ -491,7 +490,7 @@ where
|
|||
or: Self,
|
||||
) -> Self {
|
||||
// Safety: The caller is responsible for upholding all invariants
|
||||
unsafe { intrinsics::simd_gather(or, source, enable.to_int()) }
|
||||
unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) }
|
||||
}
|
||||
|
||||
/// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
|
||||
|
@ -650,7 +649,7 @@ where
|
|||
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
|
||||
pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask<isize, N>) {
|
||||
// Safety: The caller is responsible for upholding all invariants
|
||||
unsafe { intrinsics::simd_scatter(self, dest, enable.to_int()) }
|
||||
unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_int()) }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -692,7 +691,8 @@ where
|
|||
fn eq(&self, other: &Self) -> bool {
|
||||
// Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.
|
||||
let mask = unsafe {
|
||||
let tfvec: Simd<<T as SimdElement>::Mask, N> = intrinsics::simd_eq(*self, *other);
|
||||
let tfvec: Simd<<T as SimdElement>::Mask, N> =
|
||||
core::intrinsics::simd::simd_eq(*self, *other);
|
||||
Mask::from_int_unchecked(tfvec)
|
||||
};
|
||||
|
||||
|
@ -705,7 +705,8 @@ where
|
|||
fn ne(&self, other: &Self) -> bool {
|
||||
// Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.
|
||||
let mask = unsafe {
|
||||
let tfvec: Simd<<T as SimdElement>::Mask, N> = intrinsics::simd_ne(*self, *other);
|
||||
let tfvec: Simd<<T as SimdElement>::Mask, N> =
|
||||
core::intrinsics::simd::simd_ne(*self, *other);
|
||||
Mask::from_int_unchecked(tfvec)
|
||||
};
|
||||
|
||||
|
|
|
@ -99,6 +99,19 @@ macro_rules! test_mask_api {
|
|||
assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
|
||||
}
|
||||
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
#[test]
|
||||
fn roundtrip_bitmask_conversion_odd() {
|
||||
let values = [
|
||||
true, false, true, false, true, true, false, false, false, true, true,
|
||||
];
|
||||
let mask = Mask::<$type, 11>::from_array(values);
|
||||
let bitmask = mask.to_bitmask();
|
||||
assert_eq!(bitmask, 0b11000110101);
|
||||
assert_eq!(Mask::<$type, 11>::from_bitmask(bitmask), mask);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn cast() {
|
||||
fn cast_impl<T: core_simd::simd::MaskElement>()
|
||||
|
@ -134,6 +147,35 @@ macro_rules! test_mask_api {
|
|||
assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]);
|
||||
assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask);
|
||||
}
|
||||
|
||||
// rust-lang/portable-simd#379
|
||||
#[test]
|
||||
fn roundtrip_bitmask_vector_conversion_small() {
|
||||
use core_simd::simd::ToBytes;
|
||||
let values = [
|
||||
true, false, true, true
|
||||
];
|
||||
let mask = Mask::<$type, 4>::from_array(values);
|
||||
let bitmask = mask.to_bitmask_vector();
|
||||
assert_eq!(bitmask.resize::<1>(0).to_ne_bytes()[0], 0b00001101);
|
||||
assert_eq!(Mask::<$type, 4>::from_bitmask_vector(bitmask), mask);
|
||||
}
|
||||
|
||||
/* FIXME doesn't work with non-powers-of-two, yet
|
||||
// rust-lang/portable-simd#379
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
#[test]
|
||||
fn roundtrip_bitmask_vector_conversion_odd() {
|
||||
use core_simd::simd::ToBytes;
|
||||
let values = [
|
||||
true, false, true, false, true, true, false, false, false, true, true,
|
||||
];
|
||||
let mask = Mask::<$type, 11>::from_array(values);
|
||||
let bitmask = mask.to_bitmask_vector();
|
||||
assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b00110101, 0b00000110]);
|
||||
assert_eq!(Mask::<$type, 11>::from_bitmask_vector(bitmask), mask);
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#![cfg_attr(feature = "as_crate", no_std)] // We are std!
|
||||
#![cfg_attr(
|
||||
feature = "as_crate",
|
||||
feature(platform_intrinsics),
|
||||
feature(core_intrinsics),
|
||||
feature(portable_simd),
|
||||
allow(internal_features)
|
||||
)]
|
||||
|
@ -10,6 +10,8 @@ use core::simd;
|
|||
#[cfg(feature = "as_crate")]
|
||||
use core_simd::simd;
|
||||
|
||||
use core::intrinsics::simd as intrinsics;
|
||||
|
||||
use simd::{LaneCount, Simd, SupportedLaneCount};
|
||||
|
||||
#[cfg(feature = "as_crate")]
|
||||
|
@ -22,28 +24,6 @@ use experimental as sealed;
|
|||
|
||||
use crate::sealed::Sealed;
|
||||
|
||||
// "platform intrinsics" are essentially "codegen intrinsics"
|
||||
// each of these may be scalarized and lowered to a libm call
|
||||
extern "platform-intrinsic" {
|
||||
// ceil
|
||||
fn simd_ceil<T>(x: T) -> T;
|
||||
|
||||
// floor
|
||||
fn simd_floor<T>(x: T) -> T;
|
||||
|
||||
// round
|
||||
fn simd_round<T>(x: T) -> T;
|
||||
|
||||
// trunc
|
||||
fn simd_trunc<T>(x: T) -> T;
|
||||
|
||||
// fsqrt
|
||||
fn simd_fsqrt<T>(x: T) -> T;
|
||||
|
||||
// fma
|
||||
fn simd_fma<T>(x: T, y: T, z: T) -> T;
|
||||
}
|
||||
|
||||
/// This trait provides a possibly-temporary implementation of float functions
|
||||
/// that may, in the absence of hardware support, canonicalize to calling an
|
||||
/// operating system's `math.h` dynamically-loaded library (also known as a
|
||||
|
@ -74,7 +54,7 @@ pub trait StdFloat: Sealed + Sized {
|
|||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
fn mul_add(self, a: Self, b: Self) -> Self {
|
||||
unsafe { simd_fma(self, a, b) }
|
||||
unsafe { intrinsics::simd_fma(self, a, b) }
|
||||
}
|
||||
|
||||
/// Produces a vector where every lane has the square root value
|
||||
|
@ -82,35 +62,35 @@ pub trait StdFloat: Sealed + Sized {
|
|||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
fn sqrt(self) -> Self {
|
||||
unsafe { simd_fsqrt(self) }
|
||||
unsafe { intrinsics::simd_fsqrt(self) }
|
||||
}
|
||||
|
||||
/// Returns the smallest integer greater than or equal to each lane.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn ceil(self) -> Self {
|
||||
unsafe { simd_ceil(self) }
|
||||
unsafe { intrinsics::simd_ceil(self) }
|
||||
}
|
||||
|
||||
/// Returns the largest integer value less than or equal to each lane.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn floor(self) -> Self {
|
||||
unsafe { simd_floor(self) }
|
||||
unsafe { intrinsics::simd_floor(self) }
|
||||
}
|
||||
|
||||
/// Rounds to the nearest integer value. Ties round toward zero.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn round(self) -> Self {
|
||||
unsafe { simd_round(self) }
|
||||
unsafe { intrinsics::simd_round(self) }
|
||||
}
|
||||
|
||||
/// Returns the floating point's integer value, with its fractional part removed.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[inline]
|
||||
fn trunc(self) -> Self {
|
||||
unsafe { simd_trunc(self) }
|
||||
unsafe { intrinsics::simd_trunc(self) }
|
||||
}
|
||||
|
||||
/// Returns the floating point's fractional value, with its integer part removed.
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#![feature(stdsimd, powerpc_target_feature)]
|
||||
#![feature(powerpc_target_feature)]
|
||||
#![cfg_attr(
|
||||
any(target_arch = "powerpc", target_arch = "powerpc64"),
|
||||
feature(stdarch_powerpc)
|
||||
)]
|
||||
|
||||
pub mod array;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue