From 81af496d7cbe7f4ebee2186f9e458beedeb997e5 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 3 Dec 2023 16:02:42 +0100 Subject: [PATCH 01/18] fix simd_bitmask docs --- crates/core_simd/src/intrinsics.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index b27893bc729..5260de93354 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -132,9 +132,10 @@ extern "platform-intrinsic" { // `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and // returns either an unsigned integer or array of `u8`. // Every element in the vector becomes a single bit in the returned bitmask. - // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits. // The bit order of the result depends on the byte endianness. LSB-first for little // endian and MSB-first for big endian. + // If the vector has less than 8 lanes, the mask lives in the least-significant bits + // (e.g., [true, false] becomes `0b01` on little endian and `0b10` on big endian). // // UB if called on a vector with values other than 0 and -1. #[allow(unused)] From 289c1d14f0dfd80d5e94141a3b9b59bed41c3539 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sun, 3 Dec 2023 11:27:57 -0500 Subject: [PATCH 02/18] Fix bitmask vector bit order --- crates/core_simd/src/masks/full_masks.rs | 6 ++++ crates/core_simd/tests/masks.rs | 42 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 63964f455e0..b184b98a147 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -157,6 +157,9 @@ where for x in bytes.as_mut() { *x = x.reverse_bits() } + if N % 8 > 0 { + bytes.as_mut()[N / 8] >>= 8 - N % 8; + } } bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref()); @@ -180,6 +183,9 @@ where for x in bytes.as_mut() { *x = x.reverse_bits(); } + if N % 8 > 0 { + bytes.as_mut()[N / 8] >>= 8 - N % 8; + } } // Compute the regular mask diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 00fc2a24e27..fc6a3476b7c 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -99,6 +99,19 @@ macro_rules! test_mask_api { assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask); } + #[cfg(feature = "all_lane_counts")] + #[test] + fn roundtrip_bitmask_conversion_odd() { + let values = [ + true, false, true, false, true, true, false, false, false, true, true, + ]; + let mask = Mask::<$type, 11>::from_array(values); + let bitmask = mask.to_bitmask(); + assert_eq!(bitmask, 0b11000110101); + assert_eq!(Mask::<$type, 11>::from_bitmask(bitmask), mask); + } + + #[test] fn cast() { fn cast_impl() @@ -134,6 +147,35 @@ macro_rules! test_mask_api { assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]); assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask); } + + // rust-lang/portable-simd#379 + #[test] + fn roundtrip_bitmask_vector_conversion_small() { + use core_simd::simd::ToBytes; + let values = [ + true, false, true, true + ]; + let mask = Mask::<$type, 4>::from_array(values); + let bitmask = mask.to_bitmask_vector(); + assert_eq!(bitmask.resize::<1>(0).to_ne_bytes()[0], 0b00001101); + assert_eq!(Mask::<$type, 4>::from_bitmask_vector(bitmask), mask); + } + + /* FIXME doesn't work with non-powers-of-two, yet + // rust-lang/portable-simd#379 + #[cfg(feature = "all_lane_counts")] + #[test] + fn roundtrip_bitmask_vector_conversion_odd() { + use core_simd::simd::ToBytes; + let values = [ + true, false, true, false, true, true, false, false, false, true, true, + ]; + let mask = Mask::<$type, 11>::from_array(values); + let bitmask = mask.to_bitmask_vector(); + assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b00110101, 0b00000110]); + assert_eq!(Mask::<$type, 11>::from_bitmask_vector(bitmask), mask); + } + */ } } } From c7057757607ab7a6fe460ac18b3f0c3ae2b4dc68 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Mon, 11 Dec 2023 12:17:10 -0800 Subject: [PATCH 03/18] Fix load/store safety comments to require aligned `T` Fixes: #382 --- crates/core_simd/src/vector.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 105c06741c5..881406d0eac 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -194,7 +194,7 @@ where /// With padding, `read_unaligned` will read past the end of an array of N elements. /// /// # Safety - /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`. + /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read`. #[inline] const unsafe fn load(ptr: *const [T; N]) -> Self { // There are potentially simpler ways to write this function, but this should result in @@ -215,7 +215,7 @@ where /// See `load` as to why this function is necessary. /// /// # Safety - /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`. + /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write`. #[inline] const unsafe fn store(self, ptr: *mut [T; N]) { // There are potentially simpler ways to write this function, but this should result in From b6eeb4ee90e31f7846e94e4f639f44bc5f623ca0 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 13 Dec 2023 17:46:46 -0800 Subject: [PATCH 04/18] Assume masks are correct This allows miri to detect when they are not, and may be exploited by LLVM during optimization. --- crates/core_simd/src/lib.rs | 1 + crates/core_simd/src/masks.rs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 64ba9705ef5..e974e7aa25a 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -4,6 +4,7 @@ const_maybe_uninit_as_mut_ptr, const_mut_refs, convert_float_to_int, + core_intrinsics, decl_macro, inline_const, intra_doc_pointers, diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 0623d2bf3d1..b95c070d09c 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -175,7 +175,10 @@ where #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { // Safety: the caller must confirm this invariant - unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) } + unsafe { + core::intrinsics::assume(::valid(value)); + Self(mask_impl::Mask::from_int_unchecked(value)) + } } /// Converts a vector of integers to a mask, where 0 represents `false` and -1 From bb50fa23252a82e6829308a2d771d9eb26226547 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 29 Dec 2023 18:30:53 -0500 Subject: [PATCH 05/18] Use core::intrinsics --- crates/core_simd/src/intrinsics.rs | 170 --------------------- crates/core_simd/src/lib.rs | 2 +- crates/core_simd/src/masks.rs | 19 +-- crates/core_simd/src/masks/bitmask.rs | 9 +- crates/core_simd/src/masks/full_masks.rs | 21 ++- crates/core_simd/src/mod.rs | 4 - crates/core_simd/src/ops.rs | 6 +- crates/core_simd/src/ops/unary.rs | 3 +- crates/core_simd/src/select.rs | 3 +- crates/core_simd/src/simd/cmp/eq.rs | 9 +- crates/core_simd/src/simd/cmp/ord.rs | 25 ++- crates/core_simd/src/simd/num/float.rs | 20 +-- crates/core_simd/src/simd/num/int.rs | 26 ++-- crates/core_simd/src/simd/num/uint.rs | 30 ++-- crates/core_simd/src/simd/ptr/const_ptr.rs | 14 +- crates/core_simd/src/simd/ptr/mut_ptr.rs | 14 +- crates/core_simd/src/swizzle.rs | 5 +- crates/core_simd/src/vector.rs | 11 +- 18 files changed, 106 insertions(+), 285 deletions(-) delete mode 100644 crates/core_simd/src/intrinsics.rs diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs deleted file mode 100644 index 5260de93354..00000000000 --- a/crates/core_simd/src/intrinsics.rs +++ /dev/null @@ -1,170 +0,0 @@ -//! This module contains the LLVM intrinsics bindings that provide the functionality for this -//! crate. -//! -//! The LLVM assembly language is documented here: -//! -//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef: -//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT -//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons, -//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either. -//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note: -//! this means that division by poison or undef is like division by zero, which means it inflicts... -//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program: -//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end". -//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior, -//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract. -//! -//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory. -//! -//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths. - -// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are -// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner. -// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function. -extern "platform-intrinsic" { - /// add/fadd - pub(crate) fn simd_add(x: T, y: T) -> T; - - /// sub/fsub - pub(crate) fn simd_sub(lhs: T, rhs: T) -> T; - - /// mul/fmul - pub(crate) fn simd_mul(x: T, y: T) -> T; - - /// udiv/sdiv/fdiv - /// ints and uints: {s,u}div incur UB if division by zero occurs. - /// ints: sdiv is UB for int::MIN / -1. - /// floats: fdiv is never UB, but may create NaNs or infinities. - pub(crate) fn simd_div(lhs: T, rhs: T) -> T; - - /// urem/srem/frem - /// ints and uints: {s,u}rem incur UB if division by zero occurs. - /// ints: srem is UB for int::MIN / -1. - /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno. - pub(crate) fn simd_rem(lhs: T, rhs: T) -> T; - - /// shl - /// for (u)ints. poison if rhs >= lhs::BITS - pub(crate) fn simd_shl(lhs: T, rhs: T) -> T; - - /// ints: ashr - /// uints: lshr - /// poison if rhs >= lhs::BITS - pub(crate) fn simd_shr(lhs: T, rhs: T) -> T; - - /// and - pub(crate) fn simd_and(x: T, y: T) -> T; - - /// or - pub(crate) fn simd_or(x: T, y: T) -> T; - - /// xor - pub(crate) fn simd_xor(x: T, y: T) -> T; - - /// fptoui/fptosi/uitofp/sitofp - /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5 - /// but the truncated value must fit in the target type or the result is poison. - /// use `simd_as` instead for a cast that performs a saturating conversion. - pub(crate) fn simd_cast(x: T) -> U; - /// follows Rust's `T as U` semantics, including saturating float casts - /// which amounts to the same as `simd_cast` for many cases - pub(crate) fn simd_as(x: T) -> U; - - /// neg/fneg - /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`. - /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it. - /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`. - pub(crate) fn simd_neg(x: T) -> T; - - /// fabs - pub(crate) fn simd_fabs(x: T) -> T; - - // minnum/maxnum - pub(crate) fn simd_fmin(x: T, y: T) -> T; - pub(crate) fn simd_fmax(x: T, y: T) -> T; - - // these return Simd with the same BITS size as the inputs - pub(crate) fn simd_eq(x: T, y: T) -> U; - pub(crate) fn simd_ne(x: T, y: T) -> U; - pub(crate) fn simd_lt(x: T, y: T) -> U; - pub(crate) fn simd_le(x: T, y: T) -> U; - pub(crate) fn simd_gt(x: T, y: T) -> U; - pub(crate) fn simd_ge(x: T, y: T) -> U; - - // shufflevector - // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s - pub(crate) fn simd_shuffle(x: T, y: T, idx: U) -> V; - - /// llvm.masked.gather - /// like a loop of pointer reads - /// val: vector of values to select if a lane is masked - /// ptr: vector of pointers to read from - /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val) - /// note, the LLVM intrinsic accepts a mask vector of `` - /// FIXME: review this if/when we fix up our mask story in general? - pub(crate) fn simd_gather(val: T, ptr: U, mask: V) -> T; - /// llvm.masked.scatter - /// like gather, but more spicy, as it writes instead of reads - pub(crate) fn simd_scatter(val: T, ptr: U, mask: V); - - // {s,u}add.sat - pub(crate) fn simd_saturating_add(x: T, y: T) -> T; - - // {s,u}sub.sat - pub(crate) fn simd_saturating_sub(lhs: T, rhs: T) -> T; - - // reductions - // llvm.vector.reduce.{add,fadd} - pub(crate) fn simd_reduce_add_ordered(x: T, y: U) -> U; - // llvm.vector.reduce.{mul,fmul} - pub(crate) fn simd_reduce_mul_ordered(x: T, y: U) -> U; - #[allow(unused)] - pub(crate) fn simd_reduce_all(x: T) -> bool; - #[allow(unused)] - pub(crate) fn simd_reduce_any(x: T) -> bool; - pub(crate) fn simd_reduce_max(x: T) -> U; - pub(crate) fn simd_reduce_min(x: T) -> U; - pub(crate) fn simd_reduce_and(x: T) -> U; - pub(crate) fn simd_reduce_or(x: T) -> U; - pub(crate) fn simd_reduce_xor(x: T) -> U; - - // truncate integer vector to bitmask - // `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and - // returns either an unsigned integer or array of `u8`. - // Every element in the vector becomes a single bit in the returned bitmask. - // The bit order of the result depends on the byte endianness. LSB-first for little - // endian and MSB-first for big endian. - // If the vector has less than 8 lanes, the mask lives in the least-significant bits - // (e.g., [true, false] becomes `0b01` on little endian and `0b10` on big endian). - // - // UB if called on a vector with values other than 0 and -1. - #[allow(unused)] - pub(crate) fn simd_bitmask(x: T) -> U; - - // select - // first argument is a vector of integers, -1 (all bits 1) is "true" - // logically equivalent to (yes & m) | (no & (m^-1), - // but you can use it on floats. - pub(crate) fn simd_select(m: M, yes: T, no: T) -> T; - #[allow(unused)] - pub(crate) fn simd_select_bitmask(m: M, yes: T, no: T) -> T; - - /// getelementptr (without inbounds) - /// equivalent to wrapping_offset - pub(crate) fn simd_arith_offset(ptr: T, offset: U) -> T; - - /// equivalent to `T as U` semantics, specifically for pointers - pub(crate) fn simd_cast_ptr(ptr: T) -> U; - - /// expose a pointer as an address - pub(crate) fn simd_expose_addr(ptr: T) -> U; - - /// convert an exposed address back to a pointer - pub(crate) fn simd_from_exposed_addr(addr: T) -> U; - - // Integer operations - pub(crate) fn simd_bswap(x: T) -> T; - pub(crate) fn simd_bitreverse(x: T) -> T; - pub(crate) fn simd_ctlz(x: T) -> T; - pub(crate) fn simd_cttz(x: T) -> T; -} diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 64ba9705ef5..faec64c6344 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,5 +1,6 @@ #![no_std] #![feature( + core_intrinsics, const_refs_to_cell, const_maybe_uninit_as_mut_ptr, const_mut_refs, @@ -7,7 +8,6 @@ decl_macro, inline_const, intra_doc_pointers, - platform_intrinsics, repr_simd, simd_ffi, staged_api, diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 0623d2bf3d1..32fd9acbaea 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -12,9 +12,7 @@ )] mod mask_impl; -use crate::simd::{ - cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount, -}; +use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; use core::cmp::Ordering; use core::{fmt, mem}; @@ -141,8 +139,9 @@ where // but these are "dependently-sized" types, so copy elision it is! unsafe { let bytes: [u8; N] = mem::transmute_copy(&array); - let bools: Simd = intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); - Mask::from_int_unchecked(intrinsics::simd_cast(bools)) + let bools: Simd = + core::intrinsics::simd::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); + Mask::from_int_unchecked(core::intrinsics::simd::simd_cast(bools)) } } @@ -160,7 +159,7 @@ where // This would be hypothetically valid as an "in-place" transmute, // but these are "dependently-sized" types, so copy elision it is! unsafe { - let mut bytes: Simd = intrinsics::simd_cast(self.to_int()); + let mut bytes: Simd = core::intrinsics::simd::simd_cast(self.to_int()); bytes &= Simd::splat(1i8); mem::transmute_copy(&bytes) } @@ -374,15 +373,17 @@ where ); // Safety: the input and output are integer vectors - let index: Simd = unsafe { intrinsics::simd_cast(index) }; + let index: Simd = unsafe { core::intrinsics::simd::simd_cast(index) }; let masked_index = self.select(index, Self::splat(true).to_int()); // Safety: the input and output are integer vectors - let masked_index: Simd = unsafe { intrinsics::simd_cast(masked_index) }; + let masked_index: Simd = + unsafe { core::intrinsics::simd::simd_cast(masked_index) }; // Safety: the input is an integer vector - let min_index: T::Unsigned = unsafe { intrinsics::simd_reduce_min(masked_index) }; + let min_index: T::Unsigned = + unsafe { core::intrinsics::simd::simd_reduce_min(masked_index) }; // Safety: the return value is the unsigned version of T let min_index: T = unsafe { core::mem::transmute_copy(&min_index) }; diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs index 6ddff07fea2..96c553426ee 100644 --- a/crates/core_simd/src/masks/bitmask.rs +++ b/crates/core_simd/src/masks/bitmask.rs @@ -1,6 +1,5 @@ #![allow(unused_imports)] use super::MaskElement; -use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SupportedLaneCount}; use core::marker::PhantomData; @@ -109,14 +108,18 @@ where #[must_use = "method returns a new vector and does not mutate the original value"] pub fn to_int(self) -> Simd { unsafe { - intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE)) + core::intrinsics::simd::simd_select_bitmask( + self.0, + Simd::splat(T::TRUE), + Simd::splat(T::FALSE), + ) } } #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { - unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } + unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) } } #[inline] diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs index 63964f455e0..333e449e438 100644 --- a/crates/core_simd/src/masks/full_masks.rs +++ b/crates/core_simd/src/masks/full_masks.rs @@ -1,6 +1,5 @@ //! Masks that take up full SIMD vector registers. -use crate::simd::intrinsics; use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount}; #[repr(transparent)] @@ -138,7 +137,7 @@ where U: MaskElement, { // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type. - unsafe { Mask(intrinsics::simd_cast(self.0)) } + unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) } } #[inline] @@ -150,7 +149,7 @@ where unsafe { // Compute the bitmask let mut bytes: as SupportedLaneCount>::BitMask = - intrinsics::simd_bitmask(self.0); + core::intrinsics::simd::simd_bitmask(self.0); // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { @@ -183,7 +182,7 @@ where } // Compute the regular mask - Self::from_int_unchecked(intrinsics::simd_select_bitmask( + Self::from_int_unchecked(core::intrinsics::simd::simd_select_bitmask( bytes, Self::splat(true).to_int(), Self::splat(false).to_int(), @@ -199,7 +198,7 @@ where let resized = self.to_int().resize::(T::FALSE); // Safety: `resized` is an integer vector with length M, which must match T - let bitmask: U = unsafe { intrinsics::simd_bitmask(resized) }; + let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized) }; // LLVM assumes bit order should match endianness if cfg!(target_endian = "big") { @@ -223,7 +222,7 @@ where // SAFETY: `mask` is the correct bitmask type for a u64 bitmask let mask: Simd = unsafe { - intrinsics::simd_select_bitmask( + core::intrinsics::simd::simd_select_bitmask( bitmask, Simd::::splat(T::TRUE), Simd::::splat(T::FALSE), @@ -274,14 +273,14 @@ where #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { // Safety: use `self` as an integer vector - unsafe { intrinsics::simd_reduce_any(self.to_int()) } + unsafe { core::intrinsics::simd::simd_reduce_any(self.to_int()) } } #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] pub fn all(self) -> bool { // Safety: use `self` as an integer vector - unsafe { intrinsics::simd_reduce_all(self.to_int()) } + unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) } } } @@ -306,7 +305,7 @@ where #[must_use = "method returns a new mask and does not mutate the original value"] fn bitand(self, rhs: Self) -> Self { // Safety: `self` is an integer vector - unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) } + unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) } } } @@ -320,7 +319,7 @@ where #[must_use = "method returns a new mask and does not mutate the original value"] fn bitor(self, rhs: Self) -> Self { // Safety: `self` is an integer vector - unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) } + unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) } } } @@ -334,7 +333,7 @@ where #[must_use = "method returns a new mask and does not mutate the original value"] fn bitxor(self, rhs: Self) -> Self { // Safety: `self` is an integer vector - unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) } + unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) } } } diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index fd016f1c6f7..45b1a0f9751 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -1,8 +1,6 @@ #[macro_use] mod swizzle; -pub(crate) mod intrinsics; - mod alias; mod cast; mod fmt; @@ -27,8 +25,6 @@ pub mod simd { pub mod cmp; - pub(crate) use crate::core_simd::intrinsics; - pub use crate::core_simd::alias::*; pub use crate::core_simd::cast::*; pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index 8a1b083f039..d8e10eeaa1a 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -37,7 +37,7 @@ where macro_rules! unsafe_base { ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { // Safety: $lhs and $rhs are vectors - unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } + unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) } }; } @@ -55,7 +55,7 @@ macro_rules! wrap_bitshift { #[allow(clippy::suspicious_arithmetic_impl)] // Safety: $lhs and the bitand result are vectors unsafe { - $crate::simd::intrinsics::$simd_call( + core::intrinsics::simd::$simd_call( $lhs, $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), ) @@ -97,7 +97,7 @@ macro_rules! int_divrem_guard { $rhs }; // Safety: $lhs and rhs are vectors - unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) } + unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) } } }; } diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs index a651aa73e95..bdae96332a3 100644 --- a/crates/core_simd/src/ops/unary.rs +++ b/crates/core_simd/src/ops/unary.rs @@ -1,4 +1,3 @@ -use crate::simd::intrinsics; use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::ops::{Neg, Not}; // unary ops @@ -15,7 +14,7 @@ macro_rules! neg { #[must_use = "operator returns a new vector without mutating the input"] fn neg(self) -> Self::Output { // Safety: `self` is a signed vector - unsafe { intrinsics::simd_neg(self) } + unsafe { core::intrinsics::simd::simd_neg(self) } } })* } diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index cdcf8eeec81..f33aa261a92 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -1,4 +1,3 @@ -use crate::simd::intrinsics; use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; impl Mask @@ -29,7 +28,7 @@ where { // Safety: The mask has been cast to a vector of integers, // and the operands to select between are vectors of the same type and length. - unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) } + unsafe { core::intrinsics::simd::simd_select(self.to_int(), true_values, false_values) } } /// Choose elements from two masks. diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs index f132fa2cc0c..5b4615ce51d 100644 --- a/crates/core_simd/src/simd/cmp/eq.rs +++ b/crates/core_simd/src/simd/cmp/eq.rs @@ -1,5 +1,4 @@ use crate::simd::{ - intrinsics, ptr::{SimdConstPtr, SimdMutPtr}, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount, }; @@ -31,14 +30,14 @@ macro_rules! impl_number { fn simd_eq(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_eq(self, other)) } } #[inline] fn simd_ne(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ne(self, other)) } } } )* @@ -60,14 +59,14 @@ macro_rules! impl_mask { fn simd_eq(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(intrinsics::simd_eq(self.to_int(), other.to_int())) } + unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_eq(self.to_int(), other.to_int())) } } #[inline] fn simd_ne(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(intrinsics::simd_ne(self.to_int(), other.to_int())) } + unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ne(self.to_int(), other.to_int())) } } } )* diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs index 4e9d49ea221..899f00a8316 100644 --- a/crates/core_simd/src/simd/cmp/ord.rs +++ b/crates/core_simd/src/simd/cmp/ord.rs @@ -1,6 +1,5 @@ use crate::simd::{ cmp::SimdPartialEq, - intrinsics, ptr::{SimdConstPtr, SimdMutPtr}, LaneCount, Mask, Simd, SupportedLaneCount, }; @@ -57,28 +56,28 @@ macro_rules! impl_integer { fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) } } } @@ -123,28 +122,28 @@ macro_rules! impl_float { fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } + unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) } } } )* @@ -164,28 +163,28 @@ macro_rules! impl_mask { fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(intrinsics::simd_lt(self.to_int(), other.to_int())) } + unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_lt(self.to_int(), other.to_int())) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(intrinsics::simd_le(self.to_int(), other.to_int())) } + unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_le(self.to_int(), other.to_int())) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(intrinsics::simd_gt(self.to_int(), other.to_int())) } + unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_gt(self.to_int(), other.to_int())) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(intrinsics::simd_ge(self.to_int(), other.to_int())) } + unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ge(self.to_int(), other.to_int())) } } } diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index fc0b99e87a6..59e43851ea8 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -1,7 +1,7 @@ use super::sealed::Sealed; use crate::simd::{ cmp::{SimdPartialEq, SimdPartialOrd}, - intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, + LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, }; /// Operations on SIMD vectors of floats. @@ -259,7 +259,7 @@ macro_rules! impl_trait { fn cast(self) -> Self::Cast { // Safety: supported types are guaranteed by SimdCast - unsafe { intrinsics::simd_as(self) } + unsafe { core::intrinsics::simd::simd_as(self) } } #[inline] @@ -269,7 +269,7 @@ macro_rules! impl_trait { Self::Scalar: core::convert::FloatToInt, { // Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants - unsafe { intrinsics::simd_cast(self) } + unsafe { core::intrinsics::simd::simd_cast(self) } } #[inline] @@ -289,7 +289,7 @@ macro_rules! impl_trait { #[inline] fn abs(self) -> Self { // Safety: `self` is a float vector - unsafe { intrinsics::simd_fabs(self) } + unsafe { core::intrinsics::simd::simd_fabs(self) } } #[inline] @@ -363,13 +363,13 @@ macro_rules! impl_trait { #[inline] fn simd_min(self, other: Self) -> Self { // Safety: `self` and `other` are float vectors - unsafe { intrinsics::simd_fmin(self, other) } + unsafe { core::intrinsics::simd::simd_fmin(self, other) } } #[inline] fn simd_max(self, other: Self) -> Self { // Safety: `self` and `other` are floating point vectors - unsafe { intrinsics::simd_fmax(self, other) } + unsafe { core::intrinsics::simd::simd_fmax(self, other) } } #[inline] @@ -391,7 +391,7 @@ macro_rules! impl_trait { self.as_array().iter().sum() } else { // Safety: `self` is a float vector - unsafe { intrinsics::simd_reduce_add_ordered(self, 0.) } + unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0.) } } } @@ -402,20 +402,20 @@ macro_rules! impl_trait { self.as_array().iter().product() } else { // Safety: `self` is a float vector - unsafe { intrinsics::simd_reduce_mul_ordered(self, 1.) } + unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1.) } } } #[inline] fn reduce_max(self) -> Self::Scalar { // Safety: `self` is a float vector - unsafe { intrinsics::simd_reduce_max(self) } + unsafe { core::intrinsics::simd::simd_reduce_max(self) } } #[inline] fn reduce_min(self) -> Self::Scalar { // Safety: `self` is a float vector - unsafe { intrinsics::simd_reduce_min(self) } + unsafe { core::intrinsics::simd::simd_reduce_min(self) } } } )* diff --git a/crates/core_simd/src/simd/num/int.rs b/crates/core_simd/src/simd/num/int.rs index 1f1aa272782..d7598d9ceaf 100644 --- a/crates/core_simd/src/simd/num/int.rs +++ b/crates/core_simd/src/simd/num/int.rs @@ -1,6 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - cmp::SimdPartialOrd, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement, + cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, }; @@ -237,19 +237,19 @@ macro_rules! impl_trait { #[inline] fn cast(self) -> Self::Cast { // Safety: supported types are guaranteed by SimdCast - unsafe { intrinsics::simd_as(self) } + unsafe { core::intrinsics::simd::simd_as(self) } } #[inline] fn saturating_add(self, second: Self) -> Self { // Safety: `self` is a vector - unsafe { intrinsics::simd_saturating_add(self, second) } + unsafe { core::intrinsics::simd::simd_saturating_add(self, second) } } #[inline] fn saturating_sub(self, second: Self) -> Self { // Safety: `self` is a vector - unsafe { intrinsics::simd_saturating_sub(self, second) } + unsafe { core::intrinsics::simd::simd_saturating_sub(self, second) } } #[inline] @@ -293,55 +293,55 @@ macro_rules! impl_trait { #[inline] fn reduce_sum(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_add_ordered(self, 0) } + unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0) } } #[inline] fn reduce_product(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) } + unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1) } } #[inline] fn reduce_max(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_max(self) } + unsafe { core::intrinsics::simd::simd_reduce_max(self) } } #[inline] fn reduce_min(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_min(self) } + unsafe { core::intrinsics::simd::simd_reduce_min(self) } } #[inline] fn reduce_and(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_and(self) } + unsafe { core::intrinsics::simd::simd_reduce_and(self) } } #[inline] fn reduce_or(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_or(self) } + unsafe { core::intrinsics::simd::simd_reduce_or(self) } } #[inline] fn reduce_xor(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_xor(self) } + unsafe { core::intrinsics::simd::simd_reduce_xor(self) } } #[inline] fn swap_bytes(self) -> Self { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_bswap(self) } + unsafe { core::intrinsics::simd::simd_bswap(self) } } #[inline] fn reverse_bits(self) -> Self { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_bitreverse(self) } + unsafe { core::intrinsics::simd::simd_bitreverse(self) } } #[inline] diff --git a/crates/core_simd/src/simd/num/uint.rs b/crates/core_simd/src/simd/num/uint.rs index c955ee8fe8b..53dd97f501c 100644 --- a/crates/core_simd/src/simd/num/uint.rs +++ b/crates/core_simd/src/simd/num/uint.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; /// Operations on SIMD vectors of unsigned integers. pub trait SimdUint: Copy + Sealed { @@ -117,7 +117,7 @@ macro_rules! impl_trait { #[inline] fn cast(self) -> Self::Cast { // Safety: supported types are guaranteed by SimdCast - unsafe { intrinsics::simd_as(self) } + unsafe { core::intrinsics::simd::simd_as(self) } } #[inline] @@ -129,79 +129,79 @@ macro_rules! impl_trait { #[inline] fn saturating_add(self, second: Self) -> Self { // Safety: `self` is a vector - unsafe { intrinsics::simd_saturating_add(self, second) } + unsafe { core::intrinsics::simd::simd_saturating_add(self, second) } } #[inline] fn saturating_sub(self, second: Self) -> Self { // Safety: `self` is a vector - unsafe { intrinsics::simd_saturating_sub(self, second) } + unsafe { core::intrinsics::simd::simd_saturating_sub(self, second) } } #[inline] fn reduce_sum(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_add_ordered(self, 0) } + unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0) } } #[inline] fn reduce_product(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) } + unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1) } } #[inline] fn reduce_max(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_max(self) } + unsafe { core::intrinsics::simd::simd_reduce_max(self) } } #[inline] fn reduce_min(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_min(self) } + unsafe { core::intrinsics::simd::simd_reduce_min(self) } } #[inline] fn reduce_and(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_and(self) } + unsafe { core::intrinsics::simd::simd_reduce_and(self) } } #[inline] fn reduce_or(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_or(self) } + unsafe { core::intrinsics::simd::simd_reduce_or(self) } } #[inline] fn reduce_xor(self) -> Self::Scalar { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_reduce_xor(self) } + unsafe { core::intrinsics::simd::simd_reduce_xor(self) } } #[inline] fn swap_bytes(self) -> Self { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_bswap(self) } + unsafe { core::intrinsics::simd::simd_bswap(self) } } #[inline] fn reverse_bits(self) -> Self { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_bitreverse(self) } + unsafe { core::intrinsics::simd::simd_bitreverse(self) } } #[inline] fn leading_zeros(self) -> Self { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_ctlz(self) } + unsafe { core::intrinsics::simd::simd_ctlz(self) } } #[inline] fn trailing_zeros(self) -> Self { // Safety: `self` is an integer vector - unsafe { intrinsics::simd_cttz(self) } + unsafe { core::intrinsics::simd::simd_cttz(self) } } #[inline] diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs index 97fe3fb600d..e217d1c8c87 100644 --- a/crates/core_simd/src/simd/ptr/const_ptr.rs +++ b/crates/core_simd/src/simd/ptr/const_ptr.rs @@ -1,7 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{ - cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount, -}; +use crate::simd::{cmp::SimdPartialEq, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount}; /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { @@ -103,13 +101,13 @@ where assert_eq!(size_of::<::Metadata>(), 0); // Safety: pointers can be cast - unsafe { intrinsics::simd_cast_ptr(self) } + unsafe { core::intrinsics::simd::simd_cast_ptr(self) } } #[inline] fn cast_mut(self) -> Self::MutPtr { // Safety: pointers can be cast - unsafe { intrinsics::simd_cast_ptr(self) } + unsafe { core::intrinsics::simd::simd_cast_ptr(self) } } #[inline] @@ -135,19 +133,19 @@ where #[inline] fn expose_addr(self) -> Self::Usize { // Safety: `self` is a pointer vector - unsafe { intrinsics::simd_expose_addr(self) } + unsafe { core::intrinsics::simd::simd_expose_addr(self) } } #[inline] fn from_exposed_addr(addr: Self::Usize) -> Self { // Safety: `self` is a pointer vector - unsafe { intrinsics::simd_from_exposed_addr(addr) } + unsafe { core::intrinsics::simd::simd_from_exposed_addr(addr) } } #[inline] fn wrapping_offset(self, count: Self::Isize) -> Self { // Safety: simd_arith_offset takes a vector of pointers and a vector of offsets - unsafe { intrinsics::simd_arith_offset(self, count) } + unsafe { core::intrinsics::simd::simd_arith_offset(self, count) } } #[inline] diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs index e35633d0433..5cb27af4fde 100644 --- a/crates/core_simd/src/simd/ptr/mut_ptr.rs +++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs @@ -1,7 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{ - cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount, -}; +use crate::simd::{cmp::SimdPartialEq, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount}; /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { @@ -100,13 +98,13 @@ where assert_eq!(size_of::<::Metadata>(), 0); // Safety: pointers can be cast - unsafe { intrinsics::simd_cast_ptr(self) } + unsafe { core::intrinsics::simd::simd_cast_ptr(self) } } #[inline] fn cast_const(self) -> Self::ConstPtr { // Safety: pointers can be cast - unsafe { intrinsics::simd_cast_ptr(self) } + unsafe { core::intrinsics::simd::simd_cast_ptr(self) } } #[inline] @@ -132,19 +130,19 @@ where #[inline] fn expose_addr(self) -> Self::Usize { // Safety: `self` is a pointer vector - unsafe { intrinsics::simd_expose_addr(self) } + unsafe { core::intrinsics::simd::simd_expose_addr(self) } } #[inline] fn from_exposed_addr(addr: Self::Usize) -> Self { // Safety: `self` is a pointer vector - unsafe { intrinsics::simd_from_exposed_addr(addr) } + unsafe { core::intrinsics::simd::simd_from_exposed_addr(addr) } } #[inline] fn wrapping_offset(self, count: Self::Isize) -> Self { // Safety: simd_arith_offset takes a vector of pointers and a vector of offsets - unsafe { intrinsics::simd_arith_offset(self, count) } + unsafe { core::intrinsics::simd::simd_arith_offset(self, count) } } #[inline] diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index ec8548d5574..71110bb2820 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -1,4 +1,3 @@ -use crate::simd::intrinsics; use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; /// Constructs a new SIMD vector by copying elements from selected elements in other vectors. @@ -88,7 +87,7 @@ pub trait Swizzle { { // Safety: `vector` is a vector, and the index is a const array of u32. unsafe { - intrinsics::simd_shuffle( + core::intrinsics::simd::simd_shuffle( vector, vector, const { @@ -124,7 +123,7 @@ pub trait Swizzle { { // Safety: `first` and `second` are vectors, and the index is a const array of u32. unsafe { - intrinsics::simd_shuffle( + core::intrinsics::simd::simd_shuffle( first, second, const { diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 881406d0eac..9e97a3161bb 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,6 +1,5 @@ use crate::simd::{ cmp::SimdPartialOrd, - intrinsics, ptr::{SimdConstPtr, SimdMutPtr}, LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, }; @@ -491,7 +490,7 @@ where or: Self, ) -> Self { // Safety: The caller is responsible for upholding all invariants - unsafe { intrinsics::simd_gather(or, source, enable.to_int()) } + unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) } } /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. @@ -650,7 +649,7 @@ where #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask) { // Safety: The caller is responsible for upholding all invariants - unsafe { intrinsics::simd_scatter(self, dest, enable.to_int()) } + unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_int()) } } } @@ -692,7 +691,8 @@ where fn eq(&self, other: &Self) -> bool { // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. let mask = unsafe { - let tfvec: Simd<::Mask, N> = intrinsics::simd_eq(*self, *other); + let tfvec: Simd<::Mask, N> = + core::intrinsics::simd::simd_eq(*self, *other); Mask::from_int_unchecked(tfvec) }; @@ -705,7 +705,8 @@ where fn ne(&self, other: &Self) -> bool { // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask. let mask = unsafe { - let tfvec: Simd<::Mask, N> = intrinsics::simd_ne(*self, *other); + let tfvec: Simd<::Mask, N> = + core::intrinsics::simd::simd_ne(*self, *other); Mask::from_int_unchecked(tfvec) }; From e7130ec093a761ee61e22a784ffac32aa1c49d65 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Fri, 5 Jan 2024 17:59:47 -0500 Subject: [PATCH 06/18] Add exposed_provenance for rust-lang/rust#118487 --- crates/core_simd/tests/pointers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs index a90ff928ced..b9f32d16e01 100644 --- a/crates/core_simd/tests/pointers.rs +++ b/crates/core_simd/tests/pointers.rs @@ -1,4 +1,4 @@ -#![feature(portable_simd, strict_provenance)] +#![feature(portable_simd, strict_provenance, exposed_provenance)] use core_simd::simd::{ ptr::{SimdConstPtr, SimdMutPtr}, From 4e36929e1728a62d47b32946c0d236d9395f9dba Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 9 Feb 2024 21:56:19 +0100 Subject: [PATCH 07/18] re-add const_intrinsic_copy feature --- crates/core_simd/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index faec64c6344..9180e5c7dfd 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,6 +1,7 @@ #![no_std] #![feature( core_intrinsics, + const_intrinsic_copy, const_refs_to_cell, const_maybe_uninit_as_mut_ptr, const_mut_refs, From 047ba0a280a5d23f52c717af053b3171b0f78de8 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 9 Feb 2024 22:04:03 +0100 Subject: [PATCH 08/18] stdsimd feature got split up --- crates/core_simd/src/lib.rs | 19 ++++++++++++++++++- crates/test_helpers/src/lib.rs | 6 +++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 9180e5c7dfd..dfb36d8c513 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -12,10 +12,27 @@ repr_simd, simd_ffi, staged_api, - stdsimd, strict_provenance, ptr_metadata )] +#![cfg_attr( + all( + any(target_arch = "aarch64", target_arch = "arm",), + any( + all(target_feature = "v6", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp"), + ) + ), + feature(stdarch_arm_dsp) +)] +#![cfg_attr( + all(target_arch = "arm", target_feature = "v7"), + feature(stdarch_arm_neon_intrinsics) +)] +#![cfg_attr( + any(target_arch = "powerpc", target_arch = "powerpc64"), + feature(stdarch_powerpc) +)] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)] #![allow(internal_features)] diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index b80c745aaf2..51b860a8635 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -1,4 +1,8 @@ -#![feature(stdsimd, powerpc_target_feature)] +#![feature(powerpc_target_feature)] +#![cfg_attr( + any(target_arch = "powerpc", target_arch = "powerpc64"), + feature(stdarch_powerpc) +)] pub mod array; From 851ef63576dc4d598e5690afe1ee4e066cda306a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 14 Feb 2024 16:26:03 +0100 Subject: [PATCH 09/18] use core::intrinsics::simd --- crates/std_float/src/lib.rs | 38 +++++++++---------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 1fef17242ca..4c547777fde 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -1,7 +1,7 @@ #![cfg_attr(feature = "as_crate", no_std)] // We are std! #![cfg_attr( feature = "as_crate", - feature(platform_intrinsics), + feature(core_intrinsics), feature(portable_simd), allow(internal_features) )] @@ -10,6 +10,8 @@ use core::simd; #[cfg(feature = "as_crate")] use core_simd::simd; +use core::intrinsics::simd as intrinsics; + use simd::{LaneCount, Simd, SupportedLaneCount}; #[cfg(feature = "as_crate")] @@ -22,28 +24,6 @@ use experimental as sealed; use crate::sealed::Sealed; -// "platform intrinsics" are essentially "codegen intrinsics" -// each of these may be scalarized and lowered to a libm call -extern "platform-intrinsic" { - // ceil - fn simd_ceil(x: T) -> T; - - // floor - fn simd_floor(x: T) -> T; - - // round - fn simd_round(x: T) -> T; - - // trunc - fn simd_trunc(x: T) -> T; - - // fsqrt - fn simd_fsqrt(x: T) -> T; - - // fma - fn simd_fma(x: T, y: T, z: T) -> T; -} - /// This trait provides a possibly-temporary implementation of float functions /// that may, in the absence of hardware support, canonicalize to calling an /// operating system's `math.h` dynamically-loaded library (also known as a @@ -74,7 +54,7 @@ pub trait StdFloat: Sealed + Sized { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn mul_add(self, a: Self, b: Self) -> Self { - unsafe { simd_fma(self, a, b) } + unsafe { intrinsics::simd_fma(self, a, b) } } /// Produces a vector where every lane has the square root value @@ -82,35 +62,35 @@ pub trait StdFloat: Sealed + Sized { #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] fn sqrt(self) -> Self { - unsafe { simd_fsqrt(self) } + unsafe { intrinsics::simd_fsqrt(self) } } /// Returns the smallest integer greater than or equal to each lane. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] fn ceil(self) -> Self { - unsafe { simd_ceil(self) } + unsafe { intrinsics::simd_ceil(self) } } /// Returns the largest integer value less than or equal to each lane. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] fn floor(self) -> Self { - unsafe { simd_floor(self) } + unsafe { intrinsics::simd_floor(self) } } /// Rounds to the nearest integer value. Ties round toward zero. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] fn round(self) -> Self { - unsafe { simd_round(self) } + unsafe { intrinsics::simd_round(self) } } /// Returns the floating point's integer value, with its fractional part removed. #[must_use = "method returns a new vector and does not mutate the original value"] #[inline] fn trunc(self) -> Self { - unsafe { simd_trunc(self) } + unsafe { intrinsics::simd_trunc(self) } } /// Returns the floating point's fractional value, with its integer part removed. From a2dd4a3b5be7356c7c72387346e4299093964ea6 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 17 Feb 2024 10:49:13 -0500 Subject: [PATCH 10/18] Minor fixes --- crates/core_simd/src/lib.rs | 1 - crates/core_simd/src/masks.rs | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 4cad148f224..a25723e11ce 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,6 +1,5 @@ #![no_std] #![feature( - core_intrinsics, const_intrinsic_copy, const_refs_to_cell, const_maybe_uninit_as_mut_ptr, diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 4389079adb3..ad7c68fc7f6 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -33,7 +33,7 @@ mod sealed { fn eq(self, other: Self) -> bool; - fn as_usize(self) -> usize; + fn to_usize(self) -> usize; type Unsigned: SimdElement; @@ -65,7 +65,7 @@ macro_rules! impl_element { fn eq(self, other: Self) -> bool { self == other } #[inline] - fn as_usize(self) -> usize { + fn to_usize(self) -> usize { self as usize } @@ -394,7 +394,7 @@ where if min_index.eq(T::TRUE) { None } else { - Some(min_index.as_usize()) + Some(min_index.to_usize()) } } } From 644bdfb2ac810c788300bfd79fcc0e0ec84cda5f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 17 Feb 2024 11:08:08 -0500 Subject: [PATCH 11/18] Revert "Merge pull request #385 from workingjubilee/make-an-ass-out-of-u-and-me" This reverts commit 6ad779c3f6f8e935c12f5a9e488aeeaf0d829ff1, reversing changes made to b2e1bcba2c9febb16561420392be2ca483fcad7f. --- crates/core_simd/src/lib.rs | 1 - crates/core_simd/src/masks.rs | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index a25723e11ce..6c66142f0c8 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -5,7 +5,6 @@ const_maybe_uninit_as_mut_ptr, const_mut_refs, convert_float_to_int, - core_intrinsics, decl_macro, inline_const, intra_doc_pointers, diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index ad7c68fc7f6..aad91d7acb7 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -174,10 +174,7 @@ where #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { // Safety: the caller must confirm this invariant - unsafe { - core::intrinsics::assume(::valid(value)); - Self(mask_impl::Mask::from_int_unchecked(value)) - } + unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) } } /// Converts a vector of integers to a mask, where 0 represents `false` and -1 From d8439e90bd6c86fe4368807f5d1fc9952a36aeca Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 17 Feb 2024 11:11:58 -0500 Subject: [PATCH 12/18] Add back core_intrinsics --- crates/core_simd/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 6c66142f0c8..a25723e11ce 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -5,6 +5,7 @@ const_maybe_uninit_as_mut_ptr, const_mut_refs, convert_float_to_int, + core_intrinsics, decl_macro, inline_const, intra_doc_pointers, From 44b4d26273e6d18dcd6bbadd66e793c229e89a59 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 17 Feb 2024 11:18:31 -0500 Subject: [PATCH 13/18] Revert "Revert "Merge pull request #385 from workingjubilee/make-an-ass-out-of-u-and-me"" This reverts commit 644bdfb2ac810c788300bfd79fcc0e0ec84cda5f. --- crates/core_simd/src/masks.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index aad91d7acb7..ad7c68fc7f6 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -174,7 +174,10 @@ where #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { // Safety: the caller must confirm this invariant - unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) } + unsafe { + core::intrinsics::assume(::valid(value)); + Self(mask_impl::Mask::from_int_unchecked(value)) + } } /// Converts a vector of integers to a mask, where 0 represents `false` and -1 From aebf6f156056ed803afed2ad055094d2ff4fc0cb Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 17 Feb 2024 11:33:13 -0500 Subject: [PATCH 14/18] Use intrinsics directly to avoid recursion --- crates/core_simd/src/masks.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index ad7c68fc7f6..e480c25a51e 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -12,7 +12,7 @@ )] mod mask_impl; -use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; use core::cmp::Ordering; use core::{fmt, mem}; @@ -58,7 +58,16 @@ macro_rules! impl_element { where LaneCount: SupportedLaneCount, { - (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all() + // We can't use `Simd` directly, because `Simd`'s functions call this function and + // we will end up with an infinite loop. + // Safety: `value` is an integer vector + unsafe { + use core::intrinsics::simd; + let falses: Simd = simd::simd_eq(value, Simd::splat(0 as _)); + let trues: Simd = simd::simd_eq(value, Simd::splat(-1 as _)); + let valid: Simd = simd::simd_or(falses, trues); + simd::simd_reduce_all(valid) + } } #[inline] From afa70e6d92b90b54f1c31ca9838fdeb4c7033db7 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 2 Dec 2023 10:49:21 -0500 Subject: [PATCH 15/18] Remove link to core::arch::x86_64 --- crates/core_simd/src/core_simd_docs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/core_simd_docs.md b/crates/core_simd/src/core_simd_docs.md index fa93155ff5e..bf412e035b5 100644 --- a/crates/core_simd/src/core_simd_docs.md +++ b/crates/core_simd/src/core_simd_docs.md @@ -30,7 +30,7 @@ Instead, they map to a reasonable implementation of the operation for the target Consistency between targets is not compromised to use faster or fewer instructions. In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent. -For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`)[^1] can be slightly faster than [`SimdFloat::simd_min`](`num::SimdFloat::simd_min`), but does not conform to the IEEE standard also used by [`f32::min`]. +For example, `_mm_min_ps`[^1] can be slightly faster than [`SimdFloat::simd_min`](`num::SimdFloat::simd_min`), but does not conform to the IEEE standard also used by [`f32::min`]. When necessary, [`Simd`] can be converted to the types provided by `std::arch` to make use of target-specific functions. Many targets simply don't have SIMD, or don't support SIMD for a particular element type. From 64bd26f221e5f2ce198899635999d043f4dfc241 Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 12 Dec 2023 23:26:45 +0100 Subject: [PATCH 16/18] Fix target_feature config in portable-simd --- crates/core_simd/src/swizzle_dyn.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs index bd8a38e350d..dac013cc98d 100644 --- a/crates/core_simd/src/swizzle_dyn.rs +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -55,7 +55,7 @@ where 16 => transize(vqtbl1q_u8, self, idxs), #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))] 32 => transize_raw(avx2_pshufb, self, idxs), - #[cfg(target_feature = "avx512vl,avx512vbmi")] + #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))] 32 => transize(x86::_mm256_permutexvar_epi8, self, idxs), // Notable absence: avx512bw shuffle // If avx512bw is available, odds of avx512vbmi are good From 9aec60e2c63c3c67649589a77a4e53de45cc565b Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 30 Jan 2024 03:40:53 +0000 Subject: [PATCH 17/18] Disable conversions between portable_simd and stdarch on big-endian ARM stdarch no longer provide SIMD on big-endian ARM due to https://github.com/rust-lang/stdarch/issues/1484 --- crates/core_simd/src/vendor/arm.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/core_simd/src/vendor/arm.rs b/crates/core_simd/src/vendor/arm.rs index ff3b69ccf95..ee5c6421373 100644 --- a/crates/core_simd/src/vendor/arm.rs +++ b/crates/core_simd/src/vendor/arm.rs @@ -7,9 +7,12 @@ use core::arch::arm::*; #[cfg(target_arch = "aarch64")] use core::arch::aarch64::*; -#[cfg(any( - target_arch = "aarch64", - all(target_arch = "arm", target_feature = "v7"), +#[cfg(all( + any( + target_arch = "aarch64", + all(target_arch = "arm", target_feature = "v7"), + ), + target_endian = "little" ))] mod neon { use super::*; From eee4f1d83a29345dcd542a53d729c04bc53e656d Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 17 Feb 2024 19:10:17 -0500 Subject: [PATCH 18/18] Fix swizzle_dyn --- crates/core_simd/src/swizzle_dyn.rs | 44 ++++++++--------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs index dac013cc98d..ae9ff6894b0 100644 --- a/crates/core_simd/src/swizzle_dyn.rs +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -44,7 +44,7 @@ where ))] 8 => transize(vtbl1_u8, self, idxs), #[cfg(target_feature = "ssse3")] - 16 => transize(x86::_mm_shuffle_epi8, self, idxs), + 16 => transize(x86::_mm_shuffle_epi8, self, zeroing_idxs(idxs)), #[cfg(target_feature = "simd128")] 16 => transize(wasm::i8x16_swizzle, self, idxs), #[cfg(all( @@ -54,9 +54,9 @@ where ))] 16 => transize(vqtbl1q_u8, self, idxs), #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))] - 32 => transize_raw(avx2_pshufb, self, idxs), + 32 => transize(avx2_pshufb, self, idxs), #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))] - 32 => transize(x86::_mm256_permutexvar_epi8, self, idxs), + 32 => transize(x86::_mm256_permutexvar_epi8, zeroing_idxs(idxs), self), // Notable absence: avx512bw shuffle // If avx512bw is available, odds of avx512vbmi are good // FIXME: initial AVX512VBMI variant didn't actually pass muster @@ -129,45 +129,25 @@ unsafe fn avx2_pshufb(bytes: Simd, idxs: Simd) -> Simd { #[inline(always)] unsafe fn transize( f: unsafe fn(T, T) -> T, - bytes: Simd, - idxs: Simd, + a: Simd, + b: Simd, ) -> Simd where LaneCount: SupportedLaneCount, { - let idxs = zeroing_idxs(idxs); // SAFETY: Same obligation to use this function as to use mem::transmute_copy. - unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) } + unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) } } -/// Make indices that yield 0 for this architecture +/// Make indices that yield 0 for x86 +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[allow(unused)] #[inline(always)] fn zeroing_idxs(idxs: Simd) -> Simd where LaneCount: SupportedLaneCount, { - // On x86, make sure the top bit is set. - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - let idxs = { - use crate::simd::cmp::SimdPartialOrd; - idxs.simd_lt(Simd::splat(N as u8)) - .select(idxs, Simd::splat(u8::MAX)) - }; - // Simply do nothing on most architectures. - idxs -} - -/// As transize but no implicit call to `zeroing_idxs`. -#[allow(dead_code)] -#[inline(always)] -unsafe fn transize_raw( - f: unsafe fn(T, T) -> T, - bytes: Simd, - idxs: Simd, -) -> Simd -where - LaneCount: SupportedLaneCount, -{ - // SAFETY: Same obligation to use this function as to use mem::transmute_copy. - unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) } + use crate::simd::cmp::SimdPartialOrd; + idxs.simd_lt(Simd::splat(N as u8)) + .select(idxs, Simd::splat(u8::MAX)) }