Use new bitmask intrinsics with byte arrays
This commit is contained in:
parent
1ce1c645cf
commit
36e198b97a
3 changed files with 9 additions and 39 deletions
|
@ -15,34 +15,25 @@ impl<const LANES: usize> LaneCount<LANES> {
|
|||
pub trait SupportedLaneCount: Sealed {
|
||||
#[doc(hidden)]
|
||||
type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
|
||||
|
||||
#[doc(hidden)]
|
||||
type IntBitMask;
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Sealed for LaneCount<LANES> {}
|
||||
|
||||
impl SupportedLaneCount for LaneCount<1> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<2> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<4> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<8> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<16> {
|
||||
type BitMask = [u8; 2];
|
||||
type IntBitMask = u16;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<32> {
|
||||
type BitMask = [u8; 4];
|
||||
type IntBitMask = u32;
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#![allow(unused_imports)]
|
||||
use super::MaskElement;
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
|
||||
|
@ -101,24 +102,17 @@ where
|
|||
#[inline]
|
||||
pub fn to_int(self) -> Simd<T, LANES> {
|
||||
unsafe {
|
||||
let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
|
||||
core::mem::transmute_copy(&self);
|
||||
intrinsics::simd_select_bitmask(mask, Simd::splat(T::TRUE), Simd::splat(T::FALSE))
|
||||
crate::intrinsics::simd_select_bitmask(
|
||||
self.0,
|
||||
Simd::splat(T::TRUE),
|
||||
Simd::splat(T::FALSE),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
|
||||
// TODO remove the transmute when rustc is more flexible
|
||||
assert_eq!(
|
||||
core::mem::size_of::<<LaneCount::<LANES> as SupportedLaneCount>::BitMask>(),
|
||||
core::mem::size_of::<<LaneCount::<LANES> as SupportedLaneCount>::IntBitMask>(),
|
||||
);
|
||||
unsafe {
|
||||
let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
|
||||
intrinsics::simd_bitmask(value);
|
||||
Self(core::mem::transmute_copy(&mask), PhantomData)
|
||||
}
|
||||
unsafe { Self(crate::intrinsics::simd_bitmask(value), PhantomData) }
|
||||
}
|
||||
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
|
|
|
@ -106,15 +106,8 @@ where
|
|||
#[inline]
|
||||
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
|
||||
unsafe {
|
||||
// TODO remove the transmute when rustc can use arrays of u8 as bitmasks
|
||||
assert_eq!(
|
||||
core::mem::size_of::<<LaneCount::<LANES> as SupportedLaneCount>::IntBitMask>(),
|
||||
LaneCount::<LANES>::BITMASK_LEN,
|
||||
);
|
||||
let bitmask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
|
||||
intrinsics::simd_bitmask(self.0);
|
||||
let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
|
||||
core::mem::transmute_copy(&bitmask);
|
||||
crate::intrinsics::simd_bitmask(self.0);
|
||||
|
||||
// There is a bug where LLVM appears to implement this operation with the wrong
|
||||
// bit order.
|
||||
|
@ -142,15 +135,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
// TODO remove the transmute when rustc can use arrays of u8 as bitmasks
|
||||
assert_eq!(
|
||||
core::mem::size_of::<<LaneCount::<LANES> as SupportedLaneCount>::IntBitMask>(),
|
||||
LaneCount::<LANES>::BITMASK_LEN,
|
||||
);
|
||||
let bitmask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
|
||||
core::mem::transmute_copy(&bitmask);
|
||||
|
||||
Self::from_int_unchecked(intrinsics::simd_select_bitmask(
|
||||
Self::from_int_unchecked(crate::intrinsics::simd_select_bitmask(
|
||||
bitmask,
|
||||
Self::splat(true).to_int(),
|
||||
Self::splat(false).to_int(),
|
||||
|
|
Loading…
Add table
Reference in a new issue