From 81af496d7cbe7f4ebee2186f9e458beedeb997e5 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Sun, 3 Dec 2023 16:02:42 +0100
Subject: [PATCH 01/18] fix simd_bitmask docs

---
 crates/core_simd/src/intrinsics.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index b27893bc729..5260de93354 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -132,9 +132,10 @@ extern "platform-intrinsic" {
     // `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and
     // returns either an unsigned integer or array of `u8`.
     // Every element in the vector becomes a single bit in the returned bitmask.
-    // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
     // The bit order of the result depends on the byte endianness. LSB-first for little
     // endian and MSB-first for big endian.
+    // If the vector has less than 8 lanes, the mask lives in the least-significant bits
+    // (e.g., [true, false] becomes `0b01` on little endian and `0b10` on big endian).
     //
     // UB if called on a vector with values other than 0 and -1.
     #[allow(unused)]

From 289c1d14f0dfd80d5e94141a3b9b59bed41c3539 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 3 Dec 2023 11:27:57 -0500
Subject: [PATCH 02/18] Fix bitmask vector bit order

---
 crates/core_simd/src/masks/full_masks.rs |  6 ++++
 crates/core_simd/tests/masks.rs          | 42 ++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 63964f455e0..b184b98a147 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -157,6 +157,9 @@ where
                 for x in bytes.as_mut() {
                     *x = x.reverse_bits()
                 }
+                if N % 8 > 0 {
+                    bytes.as_mut()[N / 8] >>= 8 - N % 8;
+                }
             }
 
             bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref());
@@ -180,6 +183,9 @@ where
                 for x in bytes.as_mut() {
                     *x = x.reverse_bits();
                 }
+                if N % 8 > 0 {
+                    bytes.as_mut()[N / 8] >>= 8 - N % 8;
+                }
             }
 
             // Compute the regular mask
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 00fc2a24e27..fc6a3476b7c 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -99,6 +99,19 @@ macro_rules! test_mask_api {
                 assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
             }
 
+            #[cfg(feature = "all_lane_counts")]
+            #[test]
+            fn roundtrip_bitmask_conversion_odd() {
+                let values = [
+                    true, false, true, false, true, true, false, false, false, true, true,
+                ];
+                let mask = Mask::<$type, 11>::from_array(values);
+                let bitmask = mask.to_bitmask();
+                assert_eq!(bitmask, 0b11000110101);
+                assert_eq!(Mask::<$type, 11>::from_bitmask(bitmask), mask);
+            }
+
+
             #[test]
             fn cast() {
                 fn cast_impl<T: core_simd::simd::MaskElement>()
@@ -134,6 +147,35 @@ macro_rules! test_mask_api {
                 assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]);
                 assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask);
             }
+
+            // rust-lang/portable-simd#379
+            #[test]
+            fn roundtrip_bitmask_vector_conversion_small() {
+                use core_simd::simd::ToBytes;
+                let values = [
+                    true, false, true, true
+                ];
+                let mask = Mask::<$type, 4>::from_array(values);
+                let bitmask = mask.to_bitmask_vector();
+                assert_eq!(bitmask.resize::<1>(0).to_ne_bytes()[0], 0b00001101);
+                assert_eq!(Mask::<$type, 4>::from_bitmask_vector(bitmask), mask);
+            }
+
+            /* FIXME doesn't work with non-powers-of-two, yet
+            // rust-lang/portable-simd#379
+            #[cfg(feature = "all_lane_counts")]
+            #[test]
+            fn roundtrip_bitmask_vector_conversion_odd() {
+                use core_simd::simd::ToBytes;
+                let values = [
+                    true, false, true, false, true, true, false, false, false, true, true,
+                ];
+                let mask = Mask::<$type, 11>::from_array(values);
+                let bitmask = mask.to_bitmask_vector();
+                assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b00110101, 0b00000110]);
+                assert_eq!(Mask::<$type, 11>::from_bitmask_vector(bitmask), mask);
+            }
+            */
         }
     }
 }

From c7057757607ab7a6fe460ac18b3f0c3ae2b4dc68 Mon Sep 17 00:00:00 2001
From: Jacob Lifshay <programmerjake@gmail.com>
Date: Mon, 11 Dec 2023 12:17:10 -0800
Subject: [PATCH 03/18] Fix load/store safety comments to require aligned `T`

Fixes: #382
---
 crates/core_simd/src/vector.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 105c06741c5..881406d0eac 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -194,7 +194,7 @@ where
     /// With padding, `read_unaligned` will read past the end of an array of N elements.
     ///
     /// # Safety
-    /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`.
+    /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read`.
     #[inline]
     const unsafe fn load(ptr: *const [T; N]) -> Self {
         // There are potentially simpler ways to write this function, but this should result in
@@ -215,7 +215,7 @@ where
     /// See `load` as to why this function is necessary.
     ///
     /// # Safety
-    /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`.
+    /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write`.
     #[inline]
     const unsafe fn store(self, ptr: *mut [T; N]) {
         // There are potentially simpler ways to write this function, but this should result in

From b6eeb4ee90e31f7846e94e4f639f44bc5f623ca0 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee@gmail.com>
Date: Wed, 13 Dec 2023 17:46:46 -0800
Subject: [PATCH 04/18] Assume masks are correct

This allows miri to detect when they are not, and
may be exploited by LLVM during optimization.
---
 crates/core_simd/src/lib.rs   | 1 +
 crates/core_simd/src/masks.rs | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 64ba9705ef5..e974e7aa25a 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -4,6 +4,7 @@
     const_maybe_uninit_as_mut_ptr,
     const_mut_refs,
     convert_float_to_int,
+    core_intrinsics,
     decl_macro,
     inline_const,
     intra_doc_pointers,
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 0623d2bf3d1..b95c070d09c 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -175,7 +175,10 @@ where
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
         // Safety: the caller must confirm this invariant
-        unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
+        unsafe {
+            core::intrinsics::assume(<T as Sealed>::valid(value));
+            Self(mask_impl::Mask::from_int_unchecked(value))
+        }
     }
 
     /// Converts a vector of integers to a mask, where 0 represents `false` and -1

From bb50fa23252a82e6829308a2d771d9eb26226547 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 29 Dec 2023 18:30:53 -0500
Subject: [PATCH 05/18] Use core::intrinsics

---
 crates/core_simd/src/intrinsics.rs         | 170 ---------------------
 crates/core_simd/src/lib.rs                |   2 +-
 crates/core_simd/src/masks.rs              |  19 +--
 crates/core_simd/src/masks/bitmask.rs      |   9 +-
 crates/core_simd/src/masks/full_masks.rs   |  21 ++-
 crates/core_simd/src/mod.rs                |   4 -
 crates/core_simd/src/ops.rs                |   6 +-
 crates/core_simd/src/ops/unary.rs          |   3 +-
 crates/core_simd/src/select.rs             |   3 +-
 crates/core_simd/src/simd/cmp/eq.rs        |   9 +-
 crates/core_simd/src/simd/cmp/ord.rs       |  25 ++-
 crates/core_simd/src/simd/num/float.rs     |  20 +--
 crates/core_simd/src/simd/num/int.rs       |  26 ++--
 crates/core_simd/src/simd/num/uint.rs      |  30 ++--
 crates/core_simd/src/simd/ptr/const_ptr.rs |  14 +-
 crates/core_simd/src/simd/ptr/mut_ptr.rs   |  14 +-
 crates/core_simd/src/swizzle.rs            |   5 +-
 crates/core_simd/src/vector.rs             |  11 +-
 18 files changed, 106 insertions(+), 285 deletions(-)
 delete mode 100644 crates/core_simd/src/intrinsics.rs

diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
deleted file mode 100644
index 5260de93354..00000000000
--- a/crates/core_simd/src/intrinsics.rs
+++ /dev/null
@@ -1,170 +0,0 @@
-//! This module contains the LLVM intrinsics bindings that provide the functionality for this
-//! crate.
-//!
-//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
-//!
-//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
-//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
-//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
-//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
-//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
-//!   this means that division by poison or undef is like division by zero, which means it inflicts...
-//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
-//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
-//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
-//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
-//!
-//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
-//!
-//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
-
-// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
-// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
-// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
-extern "platform-intrinsic" {
-    /// add/fadd
-    pub(crate) fn simd_add<T>(x: T, y: T) -> T;
-
-    /// sub/fsub
-    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
-
-    /// mul/fmul
-    pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
-
-    /// udiv/sdiv/fdiv
-    /// ints and uints: {s,u}div incur UB if division by zero occurs.
-    /// ints: sdiv is UB for int::MIN / -1.
-    /// floats: fdiv is never UB, but may create NaNs or infinities.
-    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
-
-    /// urem/srem/frem
-    /// ints and uints: {s,u}rem incur UB if division by zero occurs.
-    /// ints: srem is UB for int::MIN / -1.
-    /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
-    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
-
-    /// shl
-    /// for (u)ints. poison if rhs >= lhs::BITS
-    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
-
-    /// ints: ashr
-    /// uints: lshr
-    /// poison if rhs >= lhs::BITS
-    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
-
-    /// and
-    pub(crate) fn simd_and<T>(x: T, y: T) -> T;
-
-    /// or
-    pub(crate) fn simd_or<T>(x: T, y: T) -> T;
-
-    /// xor
-    pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
-
-    /// fptoui/fptosi/uitofp/sitofp
-    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
-    /// but the truncated value must fit in the target type or the result is poison.
-    /// use `simd_as` instead for a cast that performs a saturating conversion.
-    pub(crate) fn simd_cast<T, U>(x: T) -> U;
-    /// follows Rust's `T as U` semantics, including saturating float casts
-    /// which amounts to the same as `simd_cast` for many cases
-    pub(crate) fn simd_as<T, U>(x: T) -> U;
-
-    /// neg/fneg
-    /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
-    /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
-    /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
-    pub(crate) fn simd_neg<T>(x: T) -> T;
-
-    /// fabs
-    pub(crate) fn simd_fabs<T>(x: T) -> T;
-
-    // minnum/maxnum
-    pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
-    pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
-
-    // these return Simd<int, N> with the same BITS size as the inputs
-    pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
-    pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
-    pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
-    pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
-    pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
-    pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
-
-    // shufflevector
-    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
-    pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
-
-    /// llvm.masked.gather
-    /// like a loop of pointer reads
-    /// val: vector of values to select if a lane is masked
-    /// ptr: vector of pointers to read from
-    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
-    /// note, the LLVM intrinsic accepts a mask vector of `<N x i1>`
-    /// FIXME: review this if/when we fix up our mask story in general?
-    pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
-    /// llvm.masked.scatter
-    /// like gather, but more spicy, as it writes instead of reads
-    pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
-
-    // {s,u}add.sat
-    pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
-
-    // {s,u}sub.sat
-    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
-
-    // reductions
-    // llvm.vector.reduce.{add,fadd}
-    pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
-    // llvm.vector.reduce.{mul,fmul}
-    pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
-    #[allow(unused)]
-    pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
-    #[allow(unused)]
-    pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
-    pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
-    pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
-    pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
-    pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
-    pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
-
-    // truncate integer vector to bitmask
-    // `fn simd_bitmask(vector) -> unsigned integer` takes a vector of integers and
-    // returns either an unsigned integer or array of `u8`.
-    // Every element in the vector becomes a single bit in the returned bitmask.
-    // The bit order of the result depends on the byte endianness. LSB-first for little
-    // endian and MSB-first for big endian.
-    // If the vector has less than 8 lanes, the mask lives in the least-significant bits
-    // (e.g., [true, false] becomes `0b01` on little endian and `0b10` on big endian).
-    //
-    // UB if called on a vector with values other than 0 and -1.
-    #[allow(unused)]
-    pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
-
-    // select
-    // first argument is a vector of integers, -1 (all bits 1) is "true"
-    // logically equivalent to (yes & m) | (no & (m^-1),
-    // but you can use it on floats.
-    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
-    #[allow(unused)]
-    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
-
-    /// getelementptr (without inbounds)
-    /// equivalent to wrapping_offset
-    pub(crate) fn simd_arith_offset<T, U>(ptr: T, offset: U) -> T;
-
-    /// equivalent to `T as U` semantics, specifically for pointers
-    pub(crate) fn simd_cast_ptr<T, U>(ptr: T) -> U;
-
-    /// expose a pointer as an address
-    pub(crate) fn simd_expose_addr<T, U>(ptr: T) -> U;
-
-    /// convert an exposed address back to a pointer
-    pub(crate) fn simd_from_exposed_addr<T, U>(addr: T) -> U;
-
-    // Integer operations
-    pub(crate) fn simd_bswap<T>(x: T) -> T;
-    pub(crate) fn simd_bitreverse<T>(x: T) -> T;
-    pub(crate) fn simd_ctlz<T>(x: T) -> T;
-    pub(crate) fn simd_cttz<T>(x: T) -> T;
-}
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 64ba9705ef5..faec64c6344 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -1,5 +1,6 @@
 #![no_std]
 #![feature(
+    core_intrinsics,
     const_refs_to_cell,
     const_maybe_uninit_as_mut_ptr,
     const_mut_refs,
@@ -7,7 +8,6 @@
     decl_macro,
     inline_const,
     intra_doc_pointers,
-    platform_intrinsics,
     repr_simd,
     simd_ffi,
     staged_api,
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 0623d2bf3d1..32fd9acbaea 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -12,9 +12,7 @@
 )]
 mod mask_impl;
 
-use crate::simd::{
-    cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount,
-};
+use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
 use core::cmp::Ordering;
 use core::{fmt, mem};
 
@@ -141,8 +139,9 @@ where
         // but these are "dependently-sized" types, so copy elision it is!
         unsafe {
             let bytes: [u8; N] = mem::transmute_copy(&array);
-            let bools: Simd<i8, N> = intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
-            Mask::from_int_unchecked(intrinsics::simd_cast(bools))
+            let bools: Simd<i8, N> =
+                core::intrinsics::simd::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
+            Mask::from_int_unchecked(core::intrinsics::simd::simd_cast(bools))
         }
     }
 
@@ -160,7 +159,7 @@ where
         // This would be hypothetically valid as an "in-place" transmute,
         // but these are "dependently-sized" types, so copy elision it is!
         unsafe {
-            let mut bytes: Simd<i8, N> = intrinsics::simd_cast(self.to_int());
+            let mut bytes: Simd<i8, N> = core::intrinsics::simd::simd_cast(self.to_int());
             bytes &= Simd::splat(1i8);
             mem::transmute_copy(&bytes)
         }
@@ -374,15 +373,17 @@ where
         );
 
         // Safety: the input and output are integer vectors
-        let index: Simd<T, N> = unsafe { intrinsics::simd_cast(index) };
+        let index: Simd<T, N> = unsafe { core::intrinsics::simd::simd_cast(index) };
 
         let masked_index = self.select(index, Self::splat(true).to_int());
 
         // Safety: the input and output are integer vectors
-        let masked_index: Simd<T::Unsigned, N> = unsafe { intrinsics::simd_cast(masked_index) };
+        let masked_index: Simd<T::Unsigned, N> =
+            unsafe { core::intrinsics::simd::simd_cast(masked_index) };
 
         // Safety: the input is an integer vector
-        let min_index: T::Unsigned = unsafe { intrinsics::simd_reduce_min(masked_index) };
+        let min_index: T::Unsigned =
+            unsafe { core::intrinsics::simd::simd_reduce_min(masked_index) };
 
         // Safety: the return value is the unsigned version of T
         let min_index: T = unsafe { core::mem::transmute_copy(&min_index) };
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index 6ddff07fea2..96c553426ee 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -1,6 +1,5 @@
 #![allow(unused_imports)]
 use super::MaskElement;
-use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Simd, SupportedLaneCount};
 use core::marker::PhantomData;
 
@@ -109,14 +108,18 @@ where
     #[must_use = "method returns a new vector and does not mutate the original value"]
     pub fn to_int(self) -> Simd<T, N> {
         unsafe {
-            intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE))
+            core::intrinsics::simd::simd_select_bitmask(
+                self.0,
+                Simd::splat(T::TRUE),
+                Simd::splat(T::FALSE),
+            )
         }
     }
 
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
-        unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
+        unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) }
     }
 
     #[inline]
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 63964f455e0..333e449e438 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -1,6 +1,5 @@
 //! Masks that take up full SIMD vector registers.
 
-use crate::simd::intrinsics;
 use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount};
 
 #[repr(transparent)]
@@ -138,7 +137,7 @@ where
         U: MaskElement,
     {
         // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
-        unsafe { Mask(intrinsics::simd_cast(self.0)) }
+        unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) }
     }
 
     #[inline]
@@ -150,7 +149,7 @@ where
         unsafe {
             // Compute the bitmask
             let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask =
-                intrinsics::simd_bitmask(self.0);
+                core::intrinsics::simd::simd_bitmask(self.0);
 
             // LLVM assumes bit order should match endianness
             if cfg!(target_endian = "big") {
@@ -183,7 +182,7 @@ where
             }
 
             // Compute the regular mask
-            Self::from_int_unchecked(intrinsics::simd_select_bitmask(
+            Self::from_int_unchecked(core::intrinsics::simd::simd_select_bitmask(
                 bytes,
                 Self::splat(true).to_int(),
                 Self::splat(false).to_int(),
@@ -199,7 +198,7 @@ where
         let resized = self.to_int().resize::<M>(T::FALSE);
 
         // Safety: `resized` is an integer vector with length M, which must match T
-        let bitmask: U = unsafe { intrinsics::simd_bitmask(resized) };
+        let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized) };
 
         // LLVM assumes bit order should match endianness
         if cfg!(target_endian = "big") {
@@ -223,7 +222,7 @@ where
 
         // SAFETY: `mask` is the correct bitmask type for a u64 bitmask
         let mask: Simd<T, M> = unsafe {
-            intrinsics::simd_select_bitmask(
+            core::intrinsics::simd::simd_select_bitmask(
                 bitmask,
                 Simd::<T, M>::splat(T::TRUE),
                 Simd::<T, M>::splat(T::FALSE),
@@ -274,14 +273,14 @@ where
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub fn any(self) -> bool {
         // Safety: use `self` as an integer vector
-        unsafe { intrinsics::simd_reduce_any(self.to_int()) }
+        unsafe { core::intrinsics::simd::simd_reduce_any(self.to_int()) }
     }
 
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
     pub fn all(self) -> bool {
         // Safety: use `self` as an integer vector
-        unsafe { intrinsics::simd_reduce_all(self.to_int()) }
+        unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) }
     }
 }
 
@@ -306,7 +305,7 @@ where
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn bitand(self, rhs: Self) -> Self {
         // Safety: `self` is an integer vector
-        unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
+        unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) }
     }
 }
 
@@ -320,7 +319,7 @@ where
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn bitor(self, rhs: Self) -> Self {
         // Safety: `self` is an integer vector
-        unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
+        unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) }
     }
 }
 
@@ -334,7 +333,7 @@ where
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn bitxor(self, rhs: Self) -> Self {
         // Safety: `self` is an integer vector
-        unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
+        unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) }
     }
 }
 
diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index fd016f1c6f7..45b1a0f9751 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -1,8 +1,6 @@
 #[macro_use]
 mod swizzle;
 
-pub(crate) mod intrinsics;
-
 mod alias;
 mod cast;
 mod fmt;
@@ -27,8 +25,6 @@ pub mod simd {
 
     pub mod cmp;
 
-    pub(crate) use crate::core_simd::intrinsics;
-
     pub use crate::core_simd::alias::*;
     pub use crate::core_simd::cast::*;
     pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
index 8a1b083f039..d8e10eeaa1a 100644
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@@ -37,7 +37,7 @@ where
 macro_rules! unsafe_base {
     ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
         // Safety: $lhs and $rhs are vectors
-        unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+        unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
     };
 }
 
@@ -55,7 +55,7 @@ macro_rules! wrap_bitshift {
         #[allow(clippy::suspicious_arithmetic_impl)]
         // Safety: $lhs and the bitand result are vectors
         unsafe {
-            $crate::simd::intrinsics::$simd_call(
+            core::intrinsics::simd::$simd_call(
                 $lhs,
                 $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
             )
@@ -97,7 +97,7 @@ macro_rules! int_divrem_guard {
                 $rhs
             };
             // Safety: $lhs and rhs are vectors
-            unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
+            unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
         }
     };
 }
diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs
index a651aa73e95..bdae96332a3 100644
--- a/crates/core_simd/src/ops/unary.rs
+++ b/crates/core_simd/src/ops/unary.rs
@@ -1,4 +1,3 @@
-use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
 use core::ops::{Neg, Not}; // unary ops
 
@@ -15,7 +14,7 @@ macro_rules! neg {
             #[must_use = "operator returns a new vector without mutating the input"]
             fn neg(self) -> Self::Output {
                 // Safety: `self` is a signed vector
-                unsafe { intrinsics::simd_neg(self) }
+                unsafe { core::intrinsics::simd::simd_neg(self) }
             }
         })*
     }
diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs
index cdcf8eeec81..f33aa261a92 100644
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@@ -1,4 +1,3 @@
-use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
 
 impl<T, const N: usize> Mask<T, N>
@@ -29,7 +28,7 @@ where
     {
         // Safety: The mask has been cast to a vector of integers,
         // and the operands to select between are vectors of the same type and length.
-        unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
+        unsafe { core::intrinsics::simd::simd_select(self.to_int(), true_values, false_values) }
     }
 
     /// Choose elements from two masks.
diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs
index f132fa2cc0c..5b4615ce51d 100644
--- a/crates/core_simd/src/simd/cmp/eq.rs
+++ b/crates/core_simd/src/simd/cmp/eq.rs
@@ -1,5 +1,4 @@
 use crate::simd::{
-    intrinsics,
     ptr::{SimdConstPtr, SimdMutPtr},
     LaneCount, Mask, Simd, SimdElement, SupportedLaneCount,
 };
@@ -31,14 +30,14 @@ macro_rules! impl_number {
             fn simd_eq(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_eq(self, other)) }
             }
 
             #[inline]
             fn simd_ne(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ne(self, other)) }
             }
         }
         )*
@@ -60,14 +59,14 @@ macro_rules! impl_mask {
             fn simd_eq(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Self::from_int_unchecked(intrinsics::simd_eq(self.to_int(), other.to_int())) }
+                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_eq(self.to_int(), other.to_int())) }
             }
 
             #[inline]
             fn simd_ne(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Self::from_int_unchecked(intrinsics::simd_ne(self.to_int(), other.to_int())) }
+                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ne(self.to_int(), other.to_int())) }
             }
         }
         )*
diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs
index 4e9d49ea221..899f00a8316 100644
--- a/crates/core_simd/src/simd/cmp/ord.rs
+++ b/crates/core_simd/src/simd/cmp/ord.rs
@@ -1,6 +1,5 @@
 use crate::simd::{
     cmp::SimdPartialEq,
-    intrinsics,
     ptr::{SimdConstPtr, SimdMutPtr},
     LaneCount, Mask, Simd, SupportedLaneCount,
 };
@@ -57,28 +56,28 @@ macro_rules! impl_integer {
             fn simd_lt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
             }
 
             #[inline]
             fn simd_le(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) }
             }
 
             #[inline]
             fn simd_gt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
             }
 
             #[inline]
             fn simd_ge(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
             }
         }
 
@@ -123,28 +122,28 @@ macro_rules! impl_float {
             fn simd_lt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) }
             }
 
             #[inline]
             fn simd_le(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) }
             }
 
             #[inline]
             fn simd_gt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) }
             }
 
             #[inline]
             fn simd_ge(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
+                unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) }
             }
         }
         )*
@@ -164,28 +163,28 @@ macro_rules! impl_mask {
             fn simd_lt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Self::from_int_unchecked(intrinsics::simd_lt(self.to_int(), other.to_int())) }
+                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_lt(self.to_int(), other.to_int())) }
             }
 
             #[inline]
             fn simd_le(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Self::from_int_unchecked(intrinsics::simd_le(self.to_int(), other.to_int())) }
+                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_le(self.to_int(), other.to_int())) }
             }
 
             #[inline]
             fn simd_gt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Self::from_int_unchecked(intrinsics::simd_gt(self.to_int(), other.to_int())) }
+                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_gt(self.to_int(), other.to_int())) }
             }
 
             #[inline]
             fn simd_ge(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
                 // is always a valid mask.
-                unsafe { Self::from_int_unchecked(intrinsics::simd_ge(self.to_int(), other.to_int())) }
+                unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ge(self.to_int(), other.to_int())) }
             }
         }
 
diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs
index fc0b99e87a6..59e43851ea8 100644
--- a/crates/core_simd/src/simd/num/float.rs
+++ b/crates/core_simd/src/simd/num/float.rs
@@ -1,7 +1,7 @@
 use super::sealed::Sealed;
 use crate::simd::{
     cmp::{SimdPartialEq, SimdPartialOrd},
-    intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount,
+    LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount,
 };
 
 /// Operations on SIMD vectors of floats.
@@ -259,7 +259,7 @@ macro_rules! impl_trait {
             fn cast<T: SimdCast>(self) -> Self::Cast<T>
             {
                 // Safety: supported types are guaranteed by SimdCast
-                unsafe { intrinsics::simd_as(self) }
+                unsafe { core::intrinsics::simd::simd_as(self) }
             }
 
             #[inline]
@@ -269,7 +269,7 @@ macro_rules! impl_trait {
                 Self::Scalar: core::convert::FloatToInt<I>,
             {
                 // Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants
-                unsafe { intrinsics::simd_cast(self) }
+                unsafe { core::intrinsics::simd::simd_cast(self) }
             }
 
             #[inline]
@@ -289,7 +289,7 @@ macro_rules! impl_trait {
             #[inline]
             fn abs(self) -> Self {
                 // Safety: `self` is a float vector
-                unsafe { intrinsics::simd_fabs(self) }
+                unsafe { core::intrinsics::simd::simd_fabs(self) }
             }
 
             #[inline]
@@ -363,13 +363,13 @@ macro_rules! impl_trait {
             #[inline]
             fn simd_min(self, other: Self) -> Self {
                 // Safety: `self` and `other` are float vectors
-                unsafe { intrinsics::simd_fmin(self, other) }
+                unsafe { core::intrinsics::simd::simd_fmin(self, other) }
             }
 
             #[inline]
             fn simd_max(self, other: Self) -> Self {
                 // Safety: `self` and `other` are floating point vectors
-                unsafe { intrinsics::simd_fmax(self, other) }
+                unsafe { core::intrinsics::simd::simd_fmax(self, other) }
             }
 
             #[inline]
@@ -391,7 +391,7 @@ macro_rules! impl_trait {
                     self.as_array().iter().sum()
                 } else {
                     // Safety: `self` is a float vector
-                    unsafe { intrinsics::simd_reduce_add_ordered(self, 0.) }
+                    unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0.) }
                 }
             }
 
@@ -402,20 +402,20 @@ macro_rules! impl_trait {
                     self.as_array().iter().product()
                 } else {
                     // Safety: `self` is a float vector
-                    unsafe { intrinsics::simd_reduce_mul_ordered(self, 1.) }
+                    unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1.) }
                 }
             }
 
             #[inline]
             fn reduce_max(self) -> Self::Scalar {
                 // Safety: `self` is a float vector
-                unsafe { intrinsics::simd_reduce_max(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_max(self) }
             }
 
             #[inline]
             fn reduce_min(self) -> Self::Scalar {
                 // Safety: `self` is a float vector
-                unsafe { intrinsics::simd_reduce_min(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_min(self) }
             }
         }
         )*
diff --git a/crates/core_simd/src/simd/num/int.rs b/crates/core_simd/src/simd/num/int.rs
index 1f1aa272782..d7598d9ceaf 100644
--- a/crates/core_simd/src/simd/num/int.rs
+++ b/crates/core_simd/src/simd/num/int.rs
@@ -1,6 +1,6 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    cmp::SimdPartialOrd, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
+    cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
     SupportedLaneCount,
 };
 
@@ -237,19 +237,19 @@ macro_rules! impl_trait {
             #[inline]
             fn cast<T: SimdCast>(self) -> Self::Cast<T> {
                 // Safety: supported types are guaranteed by SimdCast
-                unsafe { intrinsics::simd_as(self) }
+                unsafe { core::intrinsics::simd::simd_as(self) }
             }
 
             #[inline]
             fn saturating_add(self, second: Self) -> Self {
                 // Safety: `self` is a vector
-                unsafe { intrinsics::simd_saturating_add(self, second) }
+                unsafe { core::intrinsics::simd::simd_saturating_add(self, second) }
             }
 
             #[inline]
             fn saturating_sub(self, second: Self) -> Self {
                 // Safety: `self` is a vector
-                unsafe { intrinsics::simd_saturating_sub(self, second) }
+                unsafe { core::intrinsics::simd::simd_saturating_sub(self, second) }
             }
 
             #[inline]
@@ -293,55 +293,55 @@ macro_rules! impl_trait {
             #[inline]
             fn reduce_sum(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_add_ordered(self, 0) }
+                unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0) }
             }
 
             #[inline]
             fn reduce_product(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) }
+                unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1) }
             }
 
             #[inline]
             fn reduce_max(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_max(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_max(self) }
             }
 
             #[inline]
             fn reduce_min(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_min(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_min(self) }
             }
 
             #[inline]
             fn reduce_and(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_and(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_and(self) }
             }
 
             #[inline]
             fn reduce_or(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_or(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_or(self) }
             }
 
             #[inline]
             fn reduce_xor(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_xor(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_xor(self) }
             }
 
             #[inline]
             fn swap_bytes(self) -> Self {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_bswap(self) }
+                unsafe { core::intrinsics::simd::simd_bswap(self) }
             }
 
             #[inline]
             fn reverse_bits(self) -> Self {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_bitreverse(self) }
+                unsafe { core::intrinsics::simd::simd_bitreverse(self) }
             }
 
             #[inline]
diff --git a/crates/core_simd/src/simd/num/uint.rs b/crates/core_simd/src/simd/num/uint.rs
index c955ee8fe8b..53dd97f501c 100644
--- a/crates/core_simd/src/simd/num/uint.rs
+++ b/crates/core_simd/src/simd/num/uint.rs
@@ -1,5 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
 
 /// Operations on SIMD vectors of unsigned integers.
 pub trait SimdUint: Copy + Sealed {
@@ -117,7 +117,7 @@ macro_rules! impl_trait {
             #[inline]
             fn cast<T: SimdCast>(self) -> Self::Cast<T> {
                 // Safety: supported types are guaranteed by SimdCast
-                unsafe { intrinsics::simd_as(self) }
+                unsafe { core::intrinsics::simd::simd_as(self) }
             }
 
             #[inline]
@@ -129,79 +129,79 @@ macro_rules! impl_trait {
             #[inline]
             fn saturating_add(self, second: Self) -> Self {
                 // Safety: `self` is a vector
-                unsafe { intrinsics::simd_saturating_add(self, second) }
+                unsafe { core::intrinsics::simd::simd_saturating_add(self, second) }
             }
 
             #[inline]
             fn saturating_sub(self, second: Self) -> Self {
                 // Safety: `self` is a vector
-                unsafe { intrinsics::simd_saturating_sub(self, second) }
+                unsafe { core::intrinsics::simd::simd_saturating_sub(self, second) }
             }
 
             #[inline]
             fn reduce_sum(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_add_ordered(self, 0) }
+                unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0) }
             }
 
             #[inline]
             fn reduce_product(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) }
+                unsafe { core::intrinsics::simd::simd_reduce_mul_ordered(self, 1) }
             }
 
             #[inline]
             fn reduce_max(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_max(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_max(self) }
             }
 
             #[inline]
             fn reduce_min(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_min(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_min(self) }
             }
 
             #[inline]
             fn reduce_and(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_and(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_and(self) }
             }
 
             #[inline]
             fn reduce_or(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_or(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_or(self) }
             }
 
             #[inline]
             fn reduce_xor(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_reduce_xor(self) }
+                unsafe { core::intrinsics::simd::simd_reduce_xor(self) }
             }
 
             #[inline]
             fn swap_bytes(self) -> Self {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_bswap(self) }
+                unsafe { core::intrinsics::simd::simd_bswap(self) }
             }
 
             #[inline]
             fn reverse_bits(self) -> Self {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_bitreverse(self) }
+                unsafe { core::intrinsics::simd::simd_bitreverse(self) }
             }
 
             #[inline]
             fn leading_zeros(self) -> Self {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_ctlz(self) }
+                unsafe { core::intrinsics::simd::simd_ctlz(self) }
             }
 
             #[inline]
             fn trailing_zeros(self) -> Self {
                 // Safety: `self` is an integer vector
-                unsafe { intrinsics::simd_cttz(self) }
+                unsafe { core::intrinsics::simd::simd_cttz(self) }
             }
 
             #[inline]
diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs
index 97fe3fb600d..e217d1c8c87 100644
--- a/crates/core_simd/src/simd/ptr/const_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/const_ptr.rs
@@ -1,7 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{
-    cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount,
-};
+use crate::simd::{cmp::SimdPartialEq, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount};
 
 /// Operations on SIMD vectors of constant pointers.
 pub trait SimdConstPtr: Copy + Sealed {
@@ -103,13 +101,13 @@ where
         assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
 
         // Safety: pointers can be cast
-        unsafe { intrinsics::simd_cast_ptr(self) }
+        unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
     }
 
     #[inline]
     fn cast_mut(self) -> Self::MutPtr {
         // Safety: pointers can be cast
-        unsafe { intrinsics::simd_cast_ptr(self) }
+        unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
     }
 
     #[inline]
@@ -135,19 +133,19 @@ where
     #[inline]
     fn expose_addr(self) -> Self::Usize {
         // Safety: `self` is a pointer vector
-        unsafe { intrinsics::simd_expose_addr(self) }
+        unsafe { core::intrinsics::simd::simd_expose_addr(self) }
     }
 
     #[inline]
     fn from_exposed_addr(addr: Self::Usize) -> Self {
         // Safety: `self` is a pointer vector
-        unsafe { intrinsics::simd_from_exposed_addr(addr) }
+        unsafe { core::intrinsics::simd::simd_from_exposed_addr(addr) }
     }
 
     #[inline]
     fn wrapping_offset(self, count: Self::Isize) -> Self {
         // Safety: simd_arith_offset takes a vector of pointers and a vector of offsets
-        unsafe { intrinsics::simd_arith_offset(self, count) }
+        unsafe { core::intrinsics::simd::simd_arith_offset(self, count) }
     }
 
     #[inline]
diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs
index e35633d0433..5cb27af4fde 100644
--- a/crates/core_simd/src/simd/ptr/mut_ptr.rs
+++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs
@@ -1,7 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{
-    cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount,
-};
+use crate::simd::{cmp::SimdPartialEq, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount};
 
 /// Operations on SIMD vectors of mutable pointers.
 pub trait SimdMutPtr: Copy + Sealed {
@@ -100,13 +98,13 @@ where
         assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
 
         // Safety: pointers can be cast
-        unsafe { intrinsics::simd_cast_ptr(self) }
+        unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
     }
 
     #[inline]
     fn cast_const(self) -> Self::ConstPtr {
         // Safety: pointers can be cast
-        unsafe { intrinsics::simd_cast_ptr(self) }
+        unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
     }
 
     #[inline]
@@ -132,19 +130,19 @@ where
     #[inline]
     fn expose_addr(self) -> Self::Usize {
         // Safety: `self` is a pointer vector
-        unsafe { intrinsics::simd_expose_addr(self) }
+        unsafe { core::intrinsics::simd::simd_expose_addr(self) }
     }
 
     #[inline]
     fn from_exposed_addr(addr: Self::Usize) -> Self {
         // Safety: `self` is a pointer vector
-        unsafe { intrinsics::simd_from_exposed_addr(addr) }
+        unsafe { core::intrinsics::simd::simd_from_exposed_addr(addr) }
     }
 
     #[inline]
     fn wrapping_offset(self, count: Self::Isize) -> Self {
         // Safety: simd_arith_offset takes a vector of pointers and a vector of offsets
-        unsafe { intrinsics::simd_arith_offset(self, count) }
+        unsafe { core::intrinsics::simd::simd_arith_offset(self, count) }
     }
 
     #[inline]
diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index ec8548d5574..71110bb2820 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -1,4 +1,3 @@
-use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
 
 /// Constructs a new SIMD vector by copying elements from selected elements in other vectors.
@@ -88,7 +87,7 @@ pub trait Swizzle<const N: usize> {
     {
         // Safety: `vector` is a vector, and the index is a const array of u32.
         unsafe {
-            intrinsics::simd_shuffle(
+            core::intrinsics::simd::simd_shuffle(
                 vector,
                 vector,
                 const {
@@ -124,7 +123,7 @@ pub trait Swizzle<const N: usize> {
     {
         // Safety: `first` and `second` are vectors, and the index is a const array of u32.
         unsafe {
-            intrinsics::simd_shuffle(
+            core::intrinsics::simd::simd_shuffle(
                 first,
                 second,
                 const {
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 881406d0eac..9e97a3161bb 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -1,6 +1,5 @@
 use crate::simd::{
     cmp::SimdPartialOrd,
-    intrinsics,
     ptr::{SimdConstPtr, SimdMutPtr},
     LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
 };
@@ -491,7 +490,7 @@ where
         or: Self,
     ) -> Self {
         // Safety: The caller is responsible for upholding all invariants
-        unsafe { intrinsics::simd_gather(or, source, enable.to_int()) }
+        unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) }
     }
 
     /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
@@ -650,7 +649,7 @@ where
     #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
     pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask<isize, N>) {
         // Safety: The caller is responsible for upholding all invariants
-        unsafe { intrinsics::simd_scatter(self, dest, enable.to_int()) }
+        unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_int()) }
     }
 }
 
@@ -692,7 +691,8 @@ where
     fn eq(&self, other: &Self) -> bool {
         // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.
         let mask = unsafe {
-            let tfvec: Simd<<T as SimdElement>::Mask, N> = intrinsics::simd_eq(*self, *other);
+            let tfvec: Simd<<T as SimdElement>::Mask, N> =
+                core::intrinsics::simd::simd_eq(*self, *other);
             Mask::from_int_unchecked(tfvec)
         };
 
@@ -705,7 +705,8 @@ where
     fn ne(&self, other: &Self) -> bool {
         // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.
         let mask = unsafe {
-            let tfvec: Simd<<T as SimdElement>::Mask, N> = intrinsics::simd_ne(*self, *other);
+            let tfvec: Simd<<T as SimdElement>::Mask, N> =
+                core::intrinsics::simd::simd_ne(*self, *other);
             Mask::from_int_unchecked(tfvec)
         };
 

From e7130ec093a761ee61e22a784ffac32aa1c49d65 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 5 Jan 2024 17:59:47 -0500
Subject: [PATCH 06/18] Add exposed_provenance for rust-lang/rust#118487

---
 crates/core_simd/tests/pointers.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs
index a90ff928ced..b9f32d16e01 100644
--- a/crates/core_simd/tests/pointers.rs
+++ b/crates/core_simd/tests/pointers.rs
@@ -1,4 +1,4 @@
-#![feature(portable_simd, strict_provenance)]
+#![feature(portable_simd, strict_provenance, exposed_provenance)]
 
 use core_simd::simd::{
     ptr::{SimdConstPtr, SimdMutPtr},

From 4e36929e1728a62d47b32946c0d236d9395f9dba Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Fri, 9 Feb 2024 21:56:19 +0100
Subject: [PATCH 07/18] re-add const_intrinsic_copy feature

---
 crates/core_simd/src/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index faec64c6344..9180e5c7dfd 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -1,6 +1,7 @@
 #![no_std]
 #![feature(
     core_intrinsics,
+    const_intrinsic_copy,
     const_refs_to_cell,
     const_maybe_uninit_as_mut_ptr,
     const_mut_refs,

From 047ba0a280a5d23f52c717af053b3171b0f78de8 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Fri, 9 Feb 2024 22:04:03 +0100
Subject: [PATCH 08/18] stdsimd feature got split up

---
 crates/core_simd/src/lib.rs    | 19 ++++++++++++++++++-
 crates/test_helpers/src/lib.rs |  6 +++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 9180e5c7dfd..dfb36d8c513 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -12,10 +12,27 @@
     repr_simd,
     simd_ffi,
     staged_api,
-    stdsimd,
     strict_provenance,
     ptr_metadata
 )]
+#![cfg_attr(
+    all(
+        any(target_arch = "aarch64", target_arch = "arm",),
+        any(
+            all(target_feature = "v6", not(target_feature = "mclass")),
+            all(target_feature = "mclass", target_feature = "dsp"),
+        )
+    ),
+    feature(stdarch_arm_dsp)
+)]
+#![cfg_attr(
+    all(target_arch = "arm", target_feature = "v7"),
+    feature(stdarch_arm_neon_intrinsics)
+)]
+#![cfg_attr(
+    any(target_arch = "powerpc", target_arch = "powerpc64"),
+    feature(stdarch_powerpc)
+)]
 #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
 #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
 #![allow(internal_features)]
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index b80c745aaf2..51b860a8635 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -1,4 +1,8 @@
-#![feature(stdsimd, powerpc_target_feature)]
+#![feature(powerpc_target_feature)]
+#![cfg_attr(
+    any(target_arch = "powerpc", target_arch = "powerpc64"),
+    feature(stdarch_powerpc)
+)]
 
 pub mod array;
 

From 851ef63576dc4d598e5690afe1ee4e066cda306a Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Wed, 14 Feb 2024 16:26:03 +0100
Subject: [PATCH 09/18] use core::intrinsics::simd

---
 crates/std_float/src/lib.rs | 38 +++++++++----------------------------
 1 file changed, 9 insertions(+), 29 deletions(-)

diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs
index 1fef17242ca..4c547777fde 100644
--- a/crates/std_float/src/lib.rs
+++ b/crates/std_float/src/lib.rs
@@ -1,7 +1,7 @@
 #![cfg_attr(feature = "as_crate", no_std)] // We are std!
 #![cfg_attr(
     feature = "as_crate",
-    feature(platform_intrinsics),
+    feature(core_intrinsics),
     feature(portable_simd),
     allow(internal_features)
 )]
@@ -10,6 +10,8 @@ use core::simd;
 #[cfg(feature = "as_crate")]
 use core_simd::simd;
 
+use core::intrinsics::simd as intrinsics;
+
 use simd::{LaneCount, Simd, SupportedLaneCount};
 
 #[cfg(feature = "as_crate")]
@@ -22,28 +24,6 @@ use experimental as sealed;
 
 use crate::sealed::Sealed;
 
-// "platform intrinsics" are essentially "codegen intrinsics"
-// each of these may be scalarized and lowered to a libm call
-extern "platform-intrinsic" {
-    // ceil
-    fn simd_ceil<T>(x: T) -> T;
-
-    // floor
-    fn simd_floor<T>(x: T) -> T;
-
-    // round
-    fn simd_round<T>(x: T) -> T;
-
-    // trunc
-    fn simd_trunc<T>(x: T) -> T;
-
-    // fsqrt
-    fn simd_fsqrt<T>(x: T) -> T;
-
-    // fma
-    fn simd_fma<T>(x: T, y: T, z: T) -> T;
-}
-
 /// This trait provides a possibly-temporary implementation of float functions
 /// that may, in the absence of hardware support, canonicalize to calling an
 /// operating system's `math.h` dynamically-loaded library (also known as a
@@ -74,7 +54,7 @@ pub trait StdFloat: Sealed + Sized {
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn mul_add(self, a: Self, b: Self) -> Self {
-        unsafe { simd_fma(self, a, b) }
+        unsafe { intrinsics::simd_fma(self, a, b) }
     }
 
     /// Produces a vector where every lane has the square root value
@@ -82,35 +62,35 @@ pub trait StdFloat: Sealed + Sized {
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn sqrt(self) -> Self {
-        unsafe { simd_fsqrt(self) }
+        unsafe { intrinsics::simd_fsqrt(self) }
     }
 
     /// Returns the smallest integer greater than or equal to each lane.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     #[inline]
     fn ceil(self) -> Self {
-        unsafe { simd_ceil(self) }
+        unsafe { intrinsics::simd_ceil(self) }
     }
 
     /// Returns the largest integer value less than or equal to each lane.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     #[inline]
     fn floor(self) -> Self {
-        unsafe { simd_floor(self) }
+        unsafe { intrinsics::simd_floor(self) }
     }
 
     /// Rounds to the nearest integer value. Ties round toward zero.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     #[inline]
     fn round(self) -> Self {
-        unsafe { simd_round(self) }
+        unsafe { intrinsics::simd_round(self) }
     }
 
     /// Returns the floating point's integer value, with its fractional part removed.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     #[inline]
     fn trunc(self) -> Self {
-        unsafe { simd_trunc(self) }
+        unsafe { intrinsics::simd_trunc(self) }
     }
 
     /// Returns the floating point's fractional value, with its integer part removed.

From a2dd4a3b5be7356c7c72387346e4299093964ea6 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 17 Feb 2024 10:49:13 -0500
Subject: [PATCH 10/18] Minor fixes

---
 crates/core_simd/src/lib.rs   | 1 -
 crates/core_simd/src/masks.rs | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 4cad148f224..a25723e11ce 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -1,6 +1,5 @@
 #![no_std]
 #![feature(
-    core_intrinsics,
     const_intrinsic_copy,
     const_refs_to_cell,
     const_maybe_uninit_as_mut_ptr,
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 4389079adb3..ad7c68fc7f6 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -33,7 +33,7 @@ mod sealed {
 
         fn eq(self, other: Self) -> bool;
 
-        fn as_usize(self) -> usize;
+        fn to_usize(self) -> usize;
 
         type Unsigned: SimdElement;
 
@@ -65,7 +65,7 @@ macro_rules! impl_element {
             fn eq(self, other: Self) -> bool { self == other }
 
             #[inline]
-            fn as_usize(self) -> usize {
+            fn to_usize(self) -> usize {
                 self as usize
             }
 
@@ -394,7 +394,7 @@ where
         if min_index.eq(T::TRUE) {
             None
         } else {
-            Some(min_index.as_usize())
+            Some(min_index.to_usize())
         }
     }
 }

From 644bdfb2ac810c788300bfd79fcc0e0ec84cda5f Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 17 Feb 2024 11:08:08 -0500
Subject: [PATCH 11/18] Revert "Merge pull request #385 from
 workingjubilee/make-an-ass-out-of-u-and-me"

This reverts commit 6ad779c3f6f8e935c12f5a9e488aeeaf0d829ff1, reversing
changes made to b2e1bcba2c9febb16561420392be2ca483fcad7f.
---
 crates/core_simd/src/lib.rs   | 1 -
 crates/core_simd/src/masks.rs | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index a25723e11ce..6c66142f0c8 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -5,7 +5,6 @@
     const_maybe_uninit_as_mut_ptr,
     const_mut_refs,
     convert_float_to_int,
-    core_intrinsics,
     decl_macro,
     inline_const,
     intra_doc_pointers,
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index ad7c68fc7f6..aad91d7acb7 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -174,10 +174,7 @@ where
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
         // Safety: the caller must confirm this invariant
-        unsafe {
-            core::intrinsics::assume(<T as Sealed>::valid(value));
-            Self(mask_impl::Mask::from_int_unchecked(value))
-        }
+        unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
     }
 
     /// Converts a vector of integers to a mask, where 0 represents `false` and -1

From d8439e90bd6c86fe4368807f5d1fc9952a36aeca Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 17 Feb 2024 11:11:58 -0500
Subject: [PATCH 12/18] Add back core_intrinsics

---
 crates/core_simd/src/lib.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 6c66142f0c8..a25723e11ce 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -5,6 +5,7 @@
     const_maybe_uninit_as_mut_ptr,
     const_mut_refs,
     convert_float_to_int,
+    core_intrinsics,
     decl_macro,
     inline_const,
     intra_doc_pointers,

From 44b4d26273e6d18dcd6bbadd66e793c229e89a59 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 17 Feb 2024 11:18:31 -0500
Subject: [PATCH 13/18] Revert "Revert "Merge pull request #385 from
 workingjubilee/make-an-ass-out-of-u-and-me""

This reverts commit 644bdfb2ac810c788300bfd79fcc0e0ec84cda5f.
---
 crates/core_simd/src/masks.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index aad91d7acb7..ad7c68fc7f6 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -174,7 +174,10 @@ where
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
         // Safety: the caller must confirm this invariant
-        unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
+        unsafe {
+            core::intrinsics::assume(<T as Sealed>::valid(value));
+            Self(mask_impl::Mask::from_int_unchecked(value))
+        }
     }
 
     /// Converts a vector of integers to a mask, where 0 represents `false` and -1

From aebf6f156056ed803afed2ad055094d2ff4fc0cb Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 17 Feb 2024 11:33:13 -0500
Subject: [PATCH 14/18] Use intrinsics directly to avoid recursion

---
 crates/core_simd/src/masks.rs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index ad7c68fc7f6..e480c25a51e 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -12,7 +12,7 @@
 )]
 mod mask_impl;
 
-use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
 use core::cmp::Ordering;
 use core::{fmt, mem};
 
@@ -58,7 +58,16 @@ macro_rules! impl_element {
             where
                 LaneCount<N>: SupportedLaneCount,
             {
-                (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all()
+                // We can't use `Simd` directly, because `Simd`'s functions call this function and
+                // we will end up with an infinite loop.
+                // Safety: `value` is an integer vector
+                unsafe {
+                    use core::intrinsics::simd;
+                    let falses: Simd<Self, N> = simd::simd_eq(value, Simd::splat(0 as _));
+                    let trues: Simd<Self, N> = simd::simd_eq(value, Simd::splat(-1 as _));
+                    let valid: Simd<Self, N> = simd::simd_or(falses, trues);
+                    simd::simd_reduce_all(valid)
+                }
             }
 
             #[inline]

From afa70e6d92b90b54f1c31ca9838fdeb4c7033db7 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 2 Dec 2023 10:49:21 -0500
Subject: [PATCH 15/18] Remove link to core::arch::x86_64

---
 crates/core_simd/src/core_simd_docs.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/src/core_simd_docs.md b/crates/core_simd/src/core_simd_docs.md
index fa93155ff5e..bf412e035b5 100644
--- a/crates/core_simd/src/core_simd_docs.md
+++ b/crates/core_simd/src/core_simd_docs.md
@@ -30,7 +30,7 @@ Instead, they map to a reasonable implementation of the operation for the target
 
 Consistency between targets is not compromised to use faster or fewer instructions.
 In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent.
-For example, [`_mm_min_ps`](`core::arch::x86_64::_mm_min_ps`)[^1] can be slightly faster than [`SimdFloat::simd_min`](`num::SimdFloat::simd_min`), but does not conform to the IEEE standard also used by [`f32::min`].
+For example, `_mm_min_ps`[^1] can be slightly faster than [`SimdFloat::simd_min`](`num::SimdFloat::simd_min`), but does not conform to the IEEE standard also used by [`f32::min`].
 When necessary, [`Simd<T, N>`] can be converted to the types provided by `std::arch` to make use of target-specific functions.
 
 Many targets simply don't have SIMD, or don't support SIMD for a particular element type.

From 64bd26f221e5f2ce198899635999d043f4dfc241 Mon Sep 17 00:00:00 2001
From: Urgau <urgau@numericable.fr>
Date: Tue, 12 Dec 2023 23:26:45 +0100
Subject: [PATCH 16/18] Fix target_feature config in portable-simd

---
 crates/core_simd/src/swizzle_dyn.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs
index bd8a38e350d..dac013cc98d 100644
--- a/crates/core_simd/src/swizzle_dyn.rs
+++ b/crates/core_simd/src/swizzle_dyn.rs
@@ -55,7 +55,7 @@ where
                 16 => transize(vqtbl1q_u8, self, idxs),
                 #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
                 32 => transize_raw(avx2_pshufb, self, idxs),
-                #[cfg(target_feature = "avx512vl,avx512vbmi")]
+                #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
                 32 => transize(x86::_mm256_permutexvar_epi8, self, idxs),
                 // Notable absence: avx512bw shuffle
                 // If avx512bw is available, odds of avx512vbmi are good

From 9aec60e2c63c3c67649589a77a4e53de45cc565b Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Tue, 30 Jan 2024 03:40:53 +0000
Subject: [PATCH 17/18] Disable conversions between portable_simd and stdarch
 on big-endian ARM

stdarch no longer provide SIMD on big-endian ARM due to
https://github.com/rust-lang/stdarch/issues/1484
---
 crates/core_simd/src/vendor/arm.rs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/crates/core_simd/src/vendor/arm.rs b/crates/core_simd/src/vendor/arm.rs
index ff3b69ccf95..ee5c6421373 100644
--- a/crates/core_simd/src/vendor/arm.rs
+++ b/crates/core_simd/src/vendor/arm.rs
@@ -7,9 +7,12 @@ use core::arch::arm::*;
 #[cfg(target_arch = "aarch64")]
 use core::arch::aarch64::*;
 
-#[cfg(any(
-    target_arch = "aarch64",
-    all(target_arch = "arm", target_feature = "v7"),
+#[cfg(all(
+    any(
+        target_arch = "aarch64",
+        all(target_arch = "arm", target_feature = "v7"),
+    ),
+    target_endian = "little"
 ))]
 mod neon {
     use super::*;

From eee4f1d83a29345dcd542a53d729c04bc53e656d Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 17 Feb 2024 19:10:17 -0500
Subject: [PATCH 18/18] Fix swizzle_dyn

---
 crates/core_simd/src/swizzle_dyn.rs | 44 ++++++++---------------------
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs
index dac013cc98d..ae9ff6894b0 100644
--- a/crates/core_simd/src/swizzle_dyn.rs
+++ b/crates/core_simd/src/swizzle_dyn.rs
@@ -44,7 +44,7 @@ where
                 ))]
                 8 => transize(vtbl1_u8, self, idxs),
                 #[cfg(target_feature = "ssse3")]
-                16 => transize(x86::_mm_shuffle_epi8, self, idxs),
+                16 => transize(x86::_mm_shuffle_epi8, self, zeroing_idxs(idxs)),
                 #[cfg(target_feature = "simd128")]
                 16 => transize(wasm::i8x16_swizzle, self, idxs),
                 #[cfg(all(
@@ -54,9 +54,9 @@ where
                 ))]
                 16 => transize(vqtbl1q_u8, self, idxs),
                 #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
-                32 => transize_raw(avx2_pshufb, self, idxs),
+                32 => transize(avx2_pshufb, self, idxs),
                 #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
-                32 => transize(x86::_mm256_permutexvar_epi8, self, idxs),
+                32 => transize(x86::_mm256_permutexvar_epi8, zeroing_idxs(idxs), self),
                 // Notable absence: avx512bw shuffle
                 // If avx512bw is available, odds of avx512vbmi are good
                 // FIXME: initial AVX512VBMI variant didn't actually pass muster
@@ -129,45 +129,25 @@ unsafe fn avx2_pshufb(bytes: Simd<u8, 32>, idxs: Simd<u8, 32>) -> Simd<u8, 32> {
 #[inline(always)]
 unsafe fn transize<T, const N: usize>(
     f: unsafe fn(T, T) -> T,
-    bytes: Simd<u8, N>,
-    idxs: Simd<u8, N>,
+    a: Simd<u8, N>,
+    b: Simd<u8, N>,
 ) -> Simd<u8, N>
 where
     LaneCount<N>: SupportedLaneCount,
 {
-    let idxs = zeroing_idxs(idxs);
     // SAFETY: Same obligation to use this function as to use mem::transmute_copy.
-    unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) }
+    unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) }
 }
 
-/// Make indices that yield 0 for this architecture
+/// Make indices that yield 0 for x86
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[allow(unused)]
 #[inline(always)]
 fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N>
 where
     LaneCount<N>: SupportedLaneCount,
 {
-    // On x86, make sure the top bit is set.
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    let idxs = {
-        use crate::simd::cmp::SimdPartialOrd;
-        idxs.simd_lt(Simd::splat(N as u8))
-            .select(idxs, Simd::splat(u8::MAX))
-    };
-    // Simply do nothing on most architectures.
-    idxs
-}
-
-/// As transize but no implicit call to `zeroing_idxs`.
-#[allow(dead_code)]
-#[inline(always)]
-unsafe fn transize_raw<T, const N: usize>(
-    f: unsafe fn(T, T) -> T,
-    bytes: Simd<u8, N>,
-    idxs: Simd<u8, N>,
-) -> Simd<u8, N>
-where
-    LaneCount<N>: SupportedLaneCount,
-{
-    // SAFETY: Same obligation to use this function as to use mem::transmute_copy.
-    unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) }
+    use crate::simd::cmp::SimdPartialOrd;
+    idxs.simd_lt(Simd::splat(N as u8))
+        .select(idxs, Simd::splat(u8::MAX))
 }