Merge commit 'cff979eec1ac0473fc4960ee6cde462c6aeda824' into sync-portable-simd-2024-03-22
This commit is contained in:
commit
9e0ec251d5
13 changed files with 505 additions and 52 deletions
|
@ -141,6 +141,11 @@ jobs:
|
||||||
- name: Test (release)
|
- name: Test (release)
|
||||||
run: cargo test --verbose --target=${{ matrix.target }} --release
|
run: cargo test --verbose --target=${{ matrix.target }} --release
|
||||||
|
|
||||||
|
- name: Generate docs
|
||||||
|
run: cargo doc --verbose --target=${{ matrix.target }}
|
||||||
|
env:
|
||||||
|
RUSTDOCFLAGS: -Dwarnings
|
||||||
|
|
||||||
wasm-tests:
|
wasm-tests:
|
||||||
name: "wasm (firefox, ${{ matrix.name }})"
|
name: "wasm (firefox, ${{ matrix.name }})"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
|
@ -177,6 +177,9 @@ name = "std_float"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"core_simd",
|
"core_simd",
|
||||||
|
"test_helpers",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"wasm-bindgen-test",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -13,11 +13,12 @@
|
||||||
simd_ffi,
|
simd_ffi,
|
||||||
staged_api,
|
staged_api,
|
||||||
strict_provenance,
|
strict_provenance,
|
||||||
|
prelude_import,
|
||||||
ptr_metadata
|
ptr_metadata
|
||||||
)]
|
)]
|
||||||
#![cfg_attr(
|
#![cfg_attr(
|
||||||
all(
|
all(
|
||||||
any(target_arch = "aarch64", target_arch = "arm",),
|
any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",),
|
||||||
any(
|
any(
|
||||||
all(target_feature = "v6", not(target_feature = "mclass")),
|
all(target_feature = "v6", not(target_feature = "mclass")),
|
||||||
all(target_feature = "mclass", target_feature = "dsp"),
|
all(target_feature = "mclass", target_feature = "dsp"),
|
||||||
|
@ -33,12 +34,21 @@
|
||||||
any(target_arch = "powerpc", target_arch = "powerpc64"),
|
any(target_arch = "powerpc", target_arch = "powerpc64"),
|
||||||
feature(stdarch_powerpc)
|
feature(stdarch_powerpc)
|
||||||
)]
|
)]
|
||||||
|
#![cfg_attr(
|
||||||
|
all(target_arch = "x86_64", target_feature = "avx512f"),
|
||||||
|
feature(stdarch_x86_avx512)
|
||||||
|
)]
|
||||||
#![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
|
#![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
|
||||||
#![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
|
#![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
|
||||||
|
#![doc(test(attr(deny(warnings))))]
|
||||||
#![allow(internal_features)]
|
#![allow(internal_features)]
|
||||||
#![unstable(feature = "portable_simd", issue = "86656")]
|
#![unstable(feature = "portable_simd", issue = "86656")]
|
||||||
//! Portable SIMD module.
|
//! Portable SIMD module.
|
||||||
|
|
||||||
|
#[prelude_import]
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use core::prelude::v1::*;
|
||||||
|
|
||||||
#[path = "mod.rs"]
|
#[path = "mod.rs"]
|
||||||
mod core_simd;
|
mod core_simd;
|
||||||
pub use self::core_simd::simd;
|
pub use self::core_simd::simd;
|
||||||
|
|
|
@ -34,6 +34,7 @@ mod sealed {
|
||||||
fn eq(self, other: Self) -> bool;
|
fn eq(self, other: Self) -> bool;
|
||||||
|
|
||||||
fn to_usize(self) -> usize;
|
fn to_usize(self) -> usize;
|
||||||
|
fn max_unsigned() -> u64;
|
||||||
|
|
||||||
type Unsigned: SimdElement;
|
type Unsigned: SimdElement;
|
||||||
|
|
||||||
|
@ -78,6 +79,11 @@ macro_rules! impl_element {
|
||||||
self as usize
|
self as usize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn max_unsigned() -> u64 {
|
||||||
|
<$unsigned>::MAX as u64
|
||||||
|
}
|
||||||
|
|
||||||
type Unsigned = $unsigned;
|
type Unsigned = $unsigned;
|
||||||
|
|
||||||
const TRUE: Self = -1;
|
const TRUE: Self = -1;
|
||||||
|
|
|
@ -16,7 +16,10 @@ where
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn swizzle_dyn(self, idxs: Simd<u8, N>) -> Self {
|
pub fn swizzle_dyn(self, idxs: Simd<u8, N>) -> Self {
|
||||||
#![allow(unused_imports, unused_unsafe)]
|
#![allow(unused_imports, unused_unsafe)]
|
||||||
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
|
#[cfg(all(
|
||||||
|
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||||
|
target_endian = "little"
|
||||||
|
))]
|
||||||
use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8};
|
use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8};
|
||||||
#[cfg(all(
|
#[cfg(all(
|
||||||
target_arch = "arm",
|
target_arch = "arm",
|
||||||
|
@ -37,6 +40,7 @@ where
|
||||||
#[cfg(all(
|
#[cfg(all(
|
||||||
any(
|
any(
|
||||||
target_arch = "aarch64",
|
target_arch = "aarch64",
|
||||||
|
target_arch = "arm64ec",
|
||||||
all(target_arch = "arm", target_feature = "v7")
|
all(target_arch = "arm", target_feature = "v7")
|
||||||
),
|
),
|
||||||
target_feature = "neon",
|
target_feature = "neon",
|
||||||
|
@ -48,7 +52,7 @@ where
|
||||||
#[cfg(target_feature = "simd128")]
|
#[cfg(target_feature = "simd128")]
|
||||||
16 => transize(wasm::i8x16_swizzle, self, idxs),
|
16 => transize(wasm::i8x16_swizzle, self, idxs),
|
||||||
#[cfg(all(
|
#[cfg(all(
|
||||||
target_arch = "aarch64",
|
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||||
target_feature = "neon",
|
target_feature = "neon",
|
||||||
target_endian = "little"
|
target_endian = "little"
|
||||||
))]
|
))]
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
use crate::simd::{
|
use crate::simd::{
|
||||||
cmp::SimdPartialOrd,
|
cmp::SimdPartialOrd,
|
||||||
|
num::SimdUint,
|
||||||
ptr::{SimdConstPtr, SimdMutPtr},
|
ptr::{SimdConstPtr, SimdMutPtr},
|
||||||
LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
|
LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
|
||||||
};
|
};
|
||||||
|
@ -262,6 +263,7 @@ where
|
||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// Panics if the slice's length is less than the vector's `Simd::N`.
|
/// Panics if the slice's length is less than the vector's `Simd::N`.
|
||||||
|
/// Use `load_or_default` for an alternative that does not panic.
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
|
@ -315,6 +317,143 @@ where
|
||||||
unsafe { self.store(slice.as_mut_ptr().cast()) }
|
unsafe { self.store(slice.as_mut_ptr().cast()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for
|
||||||
|
/// the `slice`. Otherwise, the default value for the element type is returned.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// # #![feature(portable_simd)]
|
||||||
|
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||||
|
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||||
|
/// # use simd::Simd;
|
||||||
|
/// let vec: Vec<i32> = vec![10, 11];
|
||||||
|
///
|
||||||
|
/// let result = Simd::<i32, 4>::load_or_default(&vec);
|
||||||
|
/// assert_eq!(result, Simd::from_array([10, 11, 0, 0]));
|
||||||
|
/// ```
|
||||||
|
#[must_use]
|
||||||
|
#[inline]
|
||||||
|
pub fn load_or_default(slice: &[T]) -> Self
|
||||||
|
where
|
||||||
|
T: Default,
|
||||||
|
{
|
||||||
|
Self::load_or(slice, Default::default())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for
|
||||||
|
/// the `slice`. Otherwise, the corresponding value from `or` is passed through.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// # #![feature(portable_simd)]
|
||||||
|
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||||
|
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||||
|
/// # use simd::Simd;
|
||||||
|
/// let vec: Vec<i32> = vec![10, 11];
|
||||||
|
/// let or = Simd::from_array([-5, -4, -3, -2]);
|
||||||
|
///
|
||||||
|
/// let result = Simd::load_or(&vec, or);
|
||||||
|
/// assert_eq!(result, Simd::from_array([10, 11, -3, -2]));
|
||||||
|
/// ```
|
||||||
|
#[must_use]
|
||||||
|
#[inline]
|
||||||
|
pub fn load_or(slice: &[T], or: Self) -> Self {
|
||||||
|
Self::load_select(slice, Mask::splat(true), or)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads contiguous elements from `slice`. Each element is read from memory if its
|
||||||
|
/// corresponding element in `enable` is `true`.
|
||||||
|
///
|
||||||
|
/// When the element is disabled or out of bounds for the slice, that memory location
|
||||||
|
/// is not accessed and the corresponding value from `or` is passed through.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// # #![feature(portable_simd)]
|
||||||
|
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||||
|
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||||
|
/// # use simd::{Simd, Mask};
|
||||||
|
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||||
|
/// let enable = Mask::from_array([true, true, false, true]);
|
||||||
|
/// let or = Simd::from_array([-5, -4, -3, -2]);
|
||||||
|
///
|
||||||
|
/// let result = Simd::load_select(&vec, enable, or);
|
||||||
|
/// assert_eq!(result, Simd::from_array([10, 11, -3, 13]));
|
||||||
|
/// ```
|
||||||
|
#[must_use]
|
||||||
|
#[inline]
|
||||||
|
pub fn load_select_or_default(slice: &[T], enable: Mask<<T as SimdElement>::Mask, N>) -> Self
|
||||||
|
where
|
||||||
|
T: Default,
|
||||||
|
{
|
||||||
|
Self::load_select(slice, enable, Default::default())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads contiguous elements from `slice`. Each element is read from memory if its
|
||||||
|
/// corresponding element in `enable` is `true`.
|
||||||
|
///
|
||||||
|
/// When the element is disabled or out of bounds for the slice, that memory location
|
||||||
|
/// is not accessed and the corresponding value from `or` is passed through.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// # #![feature(portable_simd)]
|
||||||
|
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||||
|
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||||
|
/// # use simd::{Simd, Mask};
|
||||||
|
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||||
|
/// let enable = Mask::from_array([true, true, false, true]);
|
||||||
|
/// let or = Simd::from_array([-5, -4, -3, -2]);
|
||||||
|
///
|
||||||
|
/// let result = Simd::load_select(&vec, enable, or);
|
||||||
|
/// assert_eq!(result, Simd::from_array([10, 11, -3, 13]));
|
||||||
|
/// ```
|
||||||
|
#[must_use]
|
||||||
|
#[inline]
|
||||||
|
pub fn load_select(
|
||||||
|
slice: &[T],
|
||||||
|
mut enable: Mask<<T as SimdElement>::Mask, N>,
|
||||||
|
or: Self,
|
||||||
|
) -> Self {
|
||||||
|
enable &= mask_up_to(slice.len());
|
||||||
|
// SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to
|
||||||
|
// the element.
|
||||||
|
unsafe { Self::load_select_ptr(slice.as_ptr(), enable, or) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads contiguous elements from `slice`. Each element is read from memory if its
|
||||||
|
/// corresponding element in `enable` is `true`.
|
||||||
|
///
|
||||||
|
/// When the element is disabled, that memory location is not accessed and the corresponding
|
||||||
|
/// value from `or` is passed through.
|
||||||
|
#[must_use]
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn load_select_unchecked(
|
||||||
|
slice: &[T],
|
||||||
|
enable: Mask<<T as SimdElement>::Mask, N>,
|
||||||
|
or: Self,
|
||||||
|
) -> Self {
|
||||||
|
let ptr = slice.as_ptr();
|
||||||
|
// SAFETY: The safety of reading elements from `slice` is ensured by the caller.
|
||||||
|
unsafe { Self::load_select_ptr(ptr, enable, or) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads contiguous elements starting at `ptr`. Each element is read from memory if its
|
||||||
|
/// corresponding element in `enable` is `true`.
|
||||||
|
///
|
||||||
|
/// When the element is disabled, that memory location is not accessed and the corresponding
|
||||||
|
/// value from `or` is passed through.
|
||||||
|
#[must_use]
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn load_select_ptr(
|
||||||
|
ptr: *const T,
|
||||||
|
enable: Mask<<T as SimdElement>::Mask, N>,
|
||||||
|
or: Self,
|
||||||
|
) -> Self {
|
||||||
|
// SAFETY: The safety of reading elements through `ptr` is ensured by the caller.
|
||||||
|
unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) }
|
||||||
|
}
|
||||||
|
|
||||||
/// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
|
/// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
|
||||||
/// If an index is out-of-bounds, the element is instead selected from the `or` vector.
|
/// If an index is out-of-bounds, the element is instead selected from the `or` vector.
|
||||||
///
|
///
|
||||||
|
@ -493,6 +632,77 @@ where
|
||||||
unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) }
|
unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Conditionally write contiguous elements to `slice`. The `enable` mask controls
|
||||||
|
/// which elements are written, as long as they're in-bounds of the `slice`.
|
||||||
|
/// If the element is disabled or out of bounds, no memory access to that location
|
||||||
|
/// is made.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// # #![feature(portable_simd)]
|
||||||
|
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||||
|
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||||
|
/// # use simd::{Simd, Mask};
|
||||||
|
/// let mut arr = [0i32; 4];
|
||||||
|
/// let write = Simd::from_array([-5, -4, -3, -2]);
|
||||||
|
/// let enable = Mask::from_array([false, true, true, true]);
|
||||||
|
///
|
||||||
|
/// write.store_select(&mut arr[..3], enable);
|
||||||
|
/// assert_eq!(arr, [0, -4, -3, 0]);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn store_select(self, slice: &mut [T], mut enable: Mask<<T as SimdElement>::Mask, N>) {
|
||||||
|
enable &= mask_up_to(slice.len());
|
||||||
|
// SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to
|
||||||
|
// the element.
|
||||||
|
unsafe { self.store_select_ptr(slice.as_mut_ptr(), enable) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Conditionally write contiguous elements to `slice`. The `enable` mask controls
|
||||||
|
/// which elements are written.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Every enabled element must be in bounds for the `slice`.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// # #![feature(portable_simd)]
|
||||||
|
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||||
|
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||||
|
/// # use simd::{Simd, Mask};
|
||||||
|
/// let mut arr = [0i32; 4];
|
||||||
|
/// let write = Simd::from_array([-5, -4, -3, -2]);
|
||||||
|
/// let enable = Mask::from_array([false, true, true, true]);
|
||||||
|
///
|
||||||
|
/// unsafe { write.store_select_unchecked(&mut arr, enable) };
|
||||||
|
/// assert_eq!(arr, [0, -4, -3, -2]);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn store_select_unchecked(
|
||||||
|
self,
|
||||||
|
slice: &mut [T],
|
||||||
|
enable: Mask<<T as SimdElement>::Mask, N>,
|
||||||
|
) {
|
||||||
|
let ptr = slice.as_mut_ptr();
|
||||||
|
// SAFETY: The safety of writing elements in `slice` is ensured by the caller.
|
||||||
|
unsafe { self.store_select_ptr(ptr, enable) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Conditionally write contiguous elements starting from `ptr`.
|
||||||
|
/// The `enable` mask controls which elements are written.
|
||||||
|
/// When disabled, the memory location corresponding to that element is not accessed.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Memory addresses for element are calculated [`pointer::wrapping_offset`] and
|
||||||
|
/// each enabled element must satisfy the same conditions as [`core::ptr::write`].
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<<T as SimdElement>::Mask, N>) {
|
||||||
|
// SAFETY: The safety of writing elements through `ptr` is ensured by the caller.
|
||||||
|
unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) }
|
||||||
|
}
|
||||||
|
|
||||||
/// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
|
/// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
|
||||||
/// If an index is out-of-bounds, the write is suppressed without panicking.
|
/// If an index is out-of-bounds, the write is suppressed without panicking.
|
||||||
/// If two elements in the scattered vector would write to the same index
|
/// If two elements in the scattered vector would write to the same index
|
||||||
|
@ -980,3 +1190,37 @@ where
|
||||||
{
|
{
|
||||||
type Mask = isize;
|
type Mask = isize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn lane_indices<const N: usize>() -> Simd<usize, N>
|
||||||
|
where
|
||||||
|
LaneCount<N>: SupportedLaneCount,
|
||||||
|
{
|
||||||
|
let mut index = [0; N];
|
||||||
|
for i in 0..N {
|
||||||
|
index[i] = i;
|
||||||
|
}
|
||||||
|
Simd::from_array(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn mask_up_to<M, const N: usize>(len: usize) -> Mask<M, N>
|
||||||
|
where
|
||||||
|
LaneCount<N>: SupportedLaneCount,
|
||||||
|
M: MaskElement,
|
||||||
|
{
|
||||||
|
let index = lane_indices::<N>();
|
||||||
|
let max_value: u64 = M::max_unsigned();
|
||||||
|
macro_rules! case {
|
||||||
|
($ty:ty) => {
|
||||||
|
if N < <$ty>::MAX as usize && max_value as $ty as u64 == max_value {
|
||||||
|
return index.cast().simd_lt(Simd::splat(len.min(N) as $ty)).cast();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
case!(u8);
|
||||||
|
case!(u16);
|
||||||
|
case!(u32);
|
||||||
|
case!(u64);
|
||||||
|
index.simd_lt(Simd::splat(len)).cast()
|
||||||
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ mod x86;
|
||||||
#[cfg(target_arch = "wasm32")]
|
#[cfg(target_arch = "wasm32")]
|
||||||
mod wasm32;
|
mod wasm32;
|
||||||
|
|
||||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm",))]
|
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",))]
|
||||||
mod arm;
|
mod arm;
|
||||||
|
|
||||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||||
|
|
|
@ -4,12 +4,13 @@ use crate::simd::*;
|
||||||
#[cfg(target_arch = "arm")]
|
#[cfg(target_arch = "arm")]
|
||||||
use core::arch::arm::*;
|
use core::arch::arm::*;
|
||||||
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||||
use core::arch::aarch64::*;
|
use core::arch::aarch64::*;
|
||||||
|
|
||||||
#[cfg(all(
|
#[cfg(all(
|
||||||
any(
|
any(
|
||||||
target_arch = "aarch64",
|
target_arch = "aarch64",
|
||||||
|
target_arch = "arm64ec",
|
||||||
all(target_arch = "arm", target_feature = "v7"),
|
all(target_arch = "arm", target_feature = "v7"),
|
||||||
),
|
),
|
||||||
target_endian = "little"
|
target_endian = "little"
|
||||||
|
@ -69,7 +70,10 @@ mod simd32 {
|
||||||
from_transmute! { unsafe Simd<i8, 4> => int8x4_t }
|
from_transmute! { unsafe Simd<i8, 4> => int8x4_t }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
#[cfg(all(
|
||||||
|
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||||
|
target_endian = "little"
|
||||||
|
))]
|
||||||
mod aarch64 {
|
mod aarch64 {
|
||||||
use super::neon::*;
|
use super::neon::*;
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
#![feature(portable_simd)]
|
||||||
|
use core_simd::simd::prelude::*;
|
||||||
|
|
||||||
|
#[cfg(target_arch = "wasm32")]
|
||||||
|
use wasm_bindgen_test::*;
|
||||||
|
|
||||||
|
#[cfg(target_arch = "wasm32")]
|
||||||
|
wasm_bindgen_test_configure!(run_in_browser);
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||||
|
fn masked_load_store() {
|
||||||
|
let mut arr = [u8::MAX; 7];
|
||||||
|
|
||||||
|
u8x4::splat(0).store_select(&mut arr[5..], Mask::from_array([false, true, false, true]));
|
||||||
|
// write to index 8 is OOB and dropped
|
||||||
|
assert_eq!(arr, [255u8, 255, 255, 255, 255, 255, 0]);
|
||||||
|
|
||||||
|
u8x4::from_array([0, 1, 2, 3]).store_select(&mut arr[1..], Mask::splat(true));
|
||||||
|
assert_eq!(arr, [255u8, 0, 1, 2, 3, 255, 0]);
|
||||||
|
|
||||||
|
// read from index 8 is OOB and dropped
|
||||||
|
assert_eq!(
|
||||||
|
u8x4::load_or(&arr[4..], u8x4::splat(42)),
|
||||||
|
u8x4::from_array([3, 255, 0, 42])
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
u8x4::load_select(
|
||||||
|
&arr[4..],
|
||||||
|
Mask::from_array([true, false, true, true]),
|
||||||
|
u8x4::splat(42)
|
||||||
|
),
|
||||||
|
u8x4::from_array([3, 42, 0, 42])
|
||||||
|
);
|
||||||
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
#![feature(portable_simd)]
|
#![feature(portable_simd)]
|
||||||
use core::{fmt, ops::RangeInclusive};
|
use core::{fmt, ops::RangeInclusive};
|
||||||
use test_helpers::{self, biteq, make_runner, prop_assert_biteq};
|
use test_helpers::{biteq, make_runner, prop_assert_biteq};
|
||||||
|
|
||||||
fn swizzle_dyn_scalar_ver<const N: usize>(values: [u8; N], idxs: [u8; N]) -> [u8; N] {
|
fn swizzle_dyn_scalar_ver<const N: usize>(values: [u8; N], idxs: [u8; N]) -> [u8; N] {
|
||||||
let mut array = [0; N];
|
let mut array = [0; N];
|
||||||
|
|
|
@ -8,6 +8,13 @@ edition = "2021"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
core_simd = { path = "../core_simd", default-features = false }
|
core_simd = { path = "../core_simd", default-features = false }
|
||||||
|
|
||||||
|
[dev-dependencies.test_helpers]
|
||||||
|
path = "../test_helpers"
|
||||||
|
|
||||||
|
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
|
||||||
|
wasm-bindgen = "0.2"
|
||||||
|
wasm-bindgen-test = "0.3"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["as_crate"]
|
default = ["as_crate"]
|
||||||
as_crate = []
|
as_crate = []
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
#![cfg_attr(feature = "as_crate", no_std)] // We are std!
|
|
||||||
#![cfg_attr(
|
#![cfg_attr(
|
||||||
feature = "as_crate",
|
feature = "as_crate",
|
||||||
feature(core_intrinsics),
|
feature(core_intrinsics),
|
||||||
|
@ -44,7 +43,7 @@ use crate::sealed::Sealed;
|
||||||
/// For now this trait is available to permit experimentation with SIMD float
|
/// For now this trait is available to permit experimentation with SIMD float
|
||||||
/// operations that may lack hardware support, such as `mul_add`.
|
/// operations that may lack hardware support, such as `mul_add`.
|
||||||
pub trait StdFloat: Sealed + Sized {
|
pub trait StdFloat: Sealed + Sized {
|
||||||
/// Fused multiply-add. Computes `(self * a) + b` with only one rounding error,
|
/// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error,
|
||||||
/// yielding a more accurate result than an unfused multiply-add.
|
/// yielding a more accurate result than an unfused multiply-add.
|
||||||
///
|
///
|
||||||
/// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
|
/// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
|
||||||
|
@ -57,22 +56,65 @@ pub trait StdFloat: Sealed + Sized {
|
||||||
unsafe { intrinsics::simd_fma(self, a, b) }
|
unsafe { intrinsics::simd_fma(self, a, b) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Produces a vector where every lane has the square root value
|
/// Produces a vector where every element has the square root value
|
||||||
/// of the equivalently-indexed lane in `self`
|
/// of the equivalently-indexed element in `self`
|
||||||
#[inline]
|
#[inline]
|
||||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
fn sqrt(self) -> Self {
|
fn sqrt(self) -> Self {
|
||||||
unsafe { intrinsics::simd_fsqrt(self) }
|
unsafe { intrinsics::simd_fsqrt(self) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the smallest integer greater than or equal to each lane.
|
/// Produces a vector where every element has the sine of the value
|
||||||
|
/// in the equivalently-indexed element in `self`.
|
||||||
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
|
fn sin(self) -> Self;
|
||||||
|
|
||||||
|
/// Produces a vector where every element has the cosine of the value
|
||||||
|
/// in the equivalently-indexed element in `self`.
|
||||||
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
|
fn cos(self) -> Self;
|
||||||
|
|
||||||
|
/// Produces a vector where every element has the exponential (base e) of the value
|
||||||
|
/// in the equivalently-indexed element in `self`.
|
||||||
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
|
fn exp(self) -> Self;
|
||||||
|
|
||||||
|
/// Produces a vector where every element has the exponential (base 2) of the value
|
||||||
|
/// in the equivalently-indexed element in `self`.
|
||||||
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
|
fn exp2(self) -> Self;
|
||||||
|
|
||||||
|
/// Produces a vector where every element has the natural logarithm of the value
|
||||||
|
/// in the equivalently-indexed element in `self`.
|
||||||
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
|
fn ln(self) -> Self;
|
||||||
|
|
||||||
|
/// Produces a vector where every element has the logarithm with respect to an arbitrary
|
||||||
|
/// in the equivalently-indexed elements in `self` and `base`.
|
||||||
|
#[inline]
|
||||||
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
|
fn log(self, base: Self) -> Self {
|
||||||
|
unsafe { intrinsics::simd_div(self.ln(), base.ln()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Produces a vector where every element has the base-2 logarithm of the value
|
||||||
|
/// in the equivalently-indexed element in `self`.
|
||||||
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
|
fn log2(self) -> Self;
|
||||||
|
|
||||||
|
/// Produces a vector where every element has the base-10 logarithm of the value
|
||||||
|
/// in the equivalently-indexed element in `self`.
|
||||||
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
|
fn log10(self) -> Self;
|
||||||
|
|
||||||
|
/// Returns the smallest integer greater than or equal to each element.
|
||||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
#[inline]
|
#[inline]
|
||||||
fn ceil(self) -> Self {
|
fn ceil(self) -> Self {
|
||||||
unsafe { intrinsics::simd_ceil(self) }
|
unsafe { intrinsics::simd_ceil(self) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the largest integer value less than or equal to each lane.
|
/// Returns the largest integer value less than or equal to each element.
|
||||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||||
#[inline]
|
#[inline]
|
||||||
fn floor(self) -> Self {
|
fn floor(self) -> Self {
|
||||||
|
@ -101,46 +143,65 @@ pub trait StdFloat: Sealed + Sized {
|
||||||
impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
|
impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
|
||||||
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
|
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
|
||||||
|
|
||||||
// We can safely just use all the defaults.
|
macro_rules! impl_float {
|
||||||
impl<const N: usize> StdFloat for Simd<f32, N>
|
{
|
||||||
where
|
$($fn:ident: $intrinsic:ident,)*
|
||||||
LaneCount<N>: SupportedLaneCount,
|
} => {
|
||||||
{
|
impl<const N: usize> StdFloat for Simd<f32, N>
|
||||||
/// Returns the floating point's fractional value, with its integer part removed.
|
where
|
||||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
LaneCount<N>: SupportedLaneCount,
|
||||||
#[inline]
|
{
|
||||||
fn fract(self) -> Self {
|
#[inline]
|
||||||
self - self.trunc()
|
fn fract(self) -> Self {
|
||||||
|
self - self.trunc()
|
||||||
|
}
|
||||||
|
|
||||||
|
$(
|
||||||
|
#[inline]
|
||||||
|
fn $fn(self) -> Self {
|
||||||
|
unsafe { intrinsics::$intrinsic(self) }
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> StdFloat for Simd<f64, N>
|
||||||
|
where
|
||||||
|
LaneCount<N>: SupportedLaneCount,
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn fract(self) -> Self {
|
||||||
|
self - self.trunc()
|
||||||
|
}
|
||||||
|
|
||||||
|
$(
|
||||||
|
#[inline]
|
||||||
|
fn $fn(self) -> Self {
|
||||||
|
// https://github.com/llvm/llvm-project/issues/83729
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
{
|
||||||
|
let mut ln = Self::splat(0f64);
|
||||||
|
for i in 0..N {
|
||||||
|
ln[i] = self[i].$fn()
|
||||||
|
}
|
||||||
|
ln
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(target_arch = "aarch64"))]
|
||||||
|
{
|
||||||
|
unsafe { intrinsics::$intrinsic(self) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<const N: usize> StdFloat for Simd<f64, N>
|
impl_float! {
|
||||||
where
|
sin: simd_fsin,
|
||||||
LaneCount<N>: SupportedLaneCount,
|
cos: simd_fcos,
|
||||||
{
|
exp: simd_fexp,
|
||||||
/// Returns the floating point's fractional value, with its integer part removed.
|
exp2: simd_fexp2,
|
||||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
ln: simd_flog,
|
||||||
#[inline]
|
log2: simd_flog2,
|
||||||
fn fract(self) -> Self {
|
log10: simd_flog10,
|
||||||
self - self.trunc()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use simd::prelude::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn everything_works() {
|
|
||||||
let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]);
|
|
||||||
let x2 = x + x;
|
|
||||||
let _xc = x.ceil();
|
|
||||||
let _xf = x.floor();
|
|
||||||
let _xr = x.round();
|
|
||||||
let _xt = x.trunc();
|
|
||||||
let _xfma = x.mul_add(x, x);
|
|
||||||
let _xsqrt = x.sqrt();
|
|
||||||
let _ = x2.abs() * x2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
74
library/portable-simd/crates/std_float/tests/float.rs
Normal file
74
library/portable-simd/crates/std_float/tests/float.rs
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
#![feature(portable_simd)]
|
||||||
|
|
||||||
|
macro_rules! unary_test {
|
||||||
|
{ $scalar:tt, $($func:tt),+ } => {
|
||||||
|
test_helpers::test_lanes! {
|
||||||
|
$(
|
||||||
|
fn $func<const LANES: usize>() {
|
||||||
|
test_helpers::test_unary_elementwise(
|
||||||
|
&core_simd::simd::Simd::<$scalar, LANES>::$func,
|
||||||
|
&$scalar::$func,
|
||||||
|
&|_| true,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! binary_test {
|
||||||
|
{ $scalar:tt, $($func:tt),+ } => {
|
||||||
|
test_helpers::test_lanes! {
|
||||||
|
$(
|
||||||
|
fn $func<const LANES: usize>() {
|
||||||
|
test_helpers::test_binary_elementwise(
|
||||||
|
&core_simd::simd::Simd::<$scalar, LANES>::$func,
|
||||||
|
&$scalar::$func,
|
||||||
|
&|_, _| true,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! ternary_test {
|
||||||
|
{ $scalar:tt, $($func:tt),+ } => {
|
||||||
|
test_helpers::test_lanes! {
|
||||||
|
$(
|
||||||
|
fn $func<const LANES: usize>() {
|
||||||
|
test_helpers::test_ternary_elementwise(
|
||||||
|
&core_simd::simd::Simd::<$scalar, LANES>::$func,
|
||||||
|
&$scalar::$func,
|
||||||
|
&|_, _, _| true,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_tests {
|
||||||
|
{ $scalar:tt } => {
|
||||||
|
mod $scalar {
|
||||||
|
use std_float::StdFloat;
|
||||||
|
|
||||||
|
unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc }
|
||||||
|
binary_test! { $scalar, log }
|
||||||
|
ternary_test! { $scalar, mul_add }
|
||||||
|
|
||||||
|
test_helpers::test_lanes! {
|
||||||
|
fn fract<const LANES: usize>() {
|
||||||
|
test_helpers::test_unary_elementwise_flush_subnormals(
|
||||||
|
&core_simd::simd::Simd::<$scalar, LANES>::fract,
|
||||||
|
&$scalar::fract,
|
||||||
|
&|_| true,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_tests! { f32 }
|
||||||
|
impl_tests! { f64 }
|
Loading…
Add table
Reference in a new issue