diff --git a/Cargo.lock b/Cargo.lock index f9ad78e3795..1963f7c0d56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2524,14 +2524,12 @@ dependencies = [ "aes", "colored", "ctrlc", - "env_logger 0.10.2", "getrandom", "jemalloc-sys", "lazy_static", "libc", "libffi", "libloading", - "log", "measureme", "rand", "regex", diff --git a/src/tools/miri/CONTRIBUTING.md b/src/tools/miri/CONTRIBUTING.md index 7a49ff3372f..f2f3a642e0a 100644 --- a/src/tools/miri/CONTRIBUTING.md +++ b/src/tools/miri/CONTRIBUTING.md @@ -78,6 +78,8 @@ custom target file, you might have to set `MIRI_NO_STD=1`. base directory, e.g. `./miri test fail` will run all compile-fail tests). These filters are passed to `cargo test`, so for multiple filers you need to use `./miri test -- FILTER1 FILTER2`. +#### Fine grained logging + You can get a trace of which MIR statements are being executed by setting the `MIRI_LOG` environment variable. For example: @@ -94,9 +96,16 @@ stacked borrows implementation: MIRI_LOG=rustc_mir::interpret=info,miri::stacked_borrows ./miri run tests/pass/vec.rs ``` -In addition, you can set `MIRI_BACKTRACE=1` to get a backtrace of where an +Note that you will only get `info`, `warn` or `error` messages if you use a prebuilt compiler. +In order to get `debug` and `trace` level messages, you need to build miri with a locally built +compiler that has `debug=true` set in `config.toml`. + +#### Debugging error messages + +You can set `MIRI_BACKTRACE=1` to get a backtrace of where an evaluation error was originally raised. + ### UI testing We use ui-testing in Miri, meaning we generate `.stderr` and `.stdout` files for the output diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock index 8cd996d8564..87dc51bd612 100644 --- a/src/tools/miri/Cargo.lock +++ b/src/tools/miri/Cargo.lock @@ -273,19 +273,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" -[[package]] -name = "env_logger" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95b3f3e67048839cb0d0781f445682a35113da7121f7c949db0e2be96a4fbece" -dependencies = [ - "humantime", - "is-terminal", - "log", - "regex", - "termcolor", -] - [[package]] name = "errno" version = "0.3.8" @@ -339,18 +326,6 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" -[[package]] -name = "hermit-abi" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - [[package]] name = "indenter" version = "0.3.3" @@ -388,17 +363,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "is-terminal" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" -dependencies = [ - "hermit-abi", - "rustix", - "windows-sys 0.52.0", -] - [[package]] name = "itoa" version = "1.0.10" @@ -529,14 +493,12 @@ dependencies = [ "aes", "colored", "ctrlc", - "env_logger", "getrandom", "jemalloc-sys", "lazy_static", "libc", "libffi", "libloading", - "log", "measureme", "rand", "regex", @@ -875,15 +837,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "termcolor" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" -dependencies = [ - "winapi-util", -] - [[package]] name = "thiserror" version = "1.0.56" @@ -1034,15 +987,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" -dependencies = [ - "winapi", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml index a65010b055b..39122c847ce 100644 --- a/src/tools/miri/Cargo.toml +++ b/src/tools/miri/Cargo.toml @@ -19,8 +19,6 @@ doctest = false # and no doc tests [dependencies] getrandom = { version = "0.2", features = ["std"] } -env_logger = "0.10" -log = "0.4" rand = "0.8" smallvec = "1.7" aes = { version = "0.8.3", features = ["hazmat"] } diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md index 6695f123c78..60bf07b1736 100644 --- a/src/tools/miri/README.md +++ b/src/tools/miri/README.md @@ -108,11 +108,8 @@ assume the right toolchain is pinned via `rustup override set nightly` or Now you can run your project in Miri: -1. Run `cargo clean` to eliminate any cached dependencies. Miri needs your - dependencies to be compiled the right way, that would not happen if they have - previously already been compiled. -2. To run all tests in your project through Miri, use `cargo miri test`. -3. If you have a binary project, you can run it through Miri using `cargo miri run`. +- To run all tests in your project through Miri, use `cargo miri test`. +- If you have a binary project, you can run it through Miri using `cargo miri run`. The first time you run Miri, it will perform some extra setup and install some dependencies. It will ask you for confirmation before installing anything. diff --git a/src/tools/miri/ci/ci.sh b/src/tools/miri/ci/ci.sh index 6bcc68ebf7c..9d2c3f362e6 100755 --- a/src/tools/miri/ci/ci.sh +++ b/src/tools/miri/ci/ci.sh @@ -121,8 +121,9 @@ case $HOST_TARGET in MIRI_TEST_TARGET=aarch64-apple-darwin run_tests MIRI_TEST_TARGET=i686-pc-windows-gnu run_tests # Some targets are only partially supported. - MIRI_TEST_TARGET=x86_64-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple pthread-threadname libc-getentropy libc-getrandom libc-misc libc-fs atomic env align - MIRI_TEST_TARGET=i686-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple pthread-threadname libc-getentropy libc-getrandom libc-misc libc-fs atomic env align + MIRI_TEST_TARGET=x86_64-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple pthread-threadname libc-getentropy libc-getrandom libc-misc libc-fs atomic env align num_cpus + MIRI_TEST_TARGET=i686-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple pthread-threadname libc-getentropy libc-getrandom libc-misc libc-fs atomic env align num_cpus + MIRI_TEST_TARGET=aarch64-linux-android run_tests_minimal hello integer vec panic/panic MIRI_TEST_TARGET=wasm32-wasi run_tests_minimal no_std integer strings wasm MIRI_TEST_TARGET=wasm32-unknown-unknown run_tests_minimal no_std integer strings wasm diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 6624672775f..ab6f899cd3a 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -dd2559e08e1530806740931037d6bb83ef956161 +4316d0c6252cb1f833e582dfa68adb98efd5ddfb diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs index 095ba173671..db4c4a28deb 100644 --- a/src/tools/miri/src/bin/miri.rs +++ b/src/tools/miri/src/bin/miri.rs @@ -5,7 +5,7 @@ clippy::useless_format, clippy::field_reassign_with_default, rustc::diagnostic_outside_of_impl, - rustc::untranslatable_diagnostic, + rustc::untranslatable_diagnostic )] extern crate rustc_data_structures; @@ -16,14 +16,14 @@ extern crate rustc_log; extern crate rustc_metadata; extern crate rustc_middle; extern crate rustc_session; +#[macro_use] +extern crate tracing; use std::env::{self, VarError}; use std::num::NonZero; use std::path::PathBuf; use std::str::FromStr; -use log::debug; - use rustc_data_structures::sync::Lrc; use rustc_driver::Compilation; use rustc_hir::{self as hir, Node}; @@ -200,7 +200,7 @@ fn rustc_logger_config() -> rustc_log::LoggerConfig { // CTFE-related. Otherwise, we use it verbatim for `RUSTC_LOG`. // This way, if you set `MIRI_LOG=trace`, you get only the right parts of // rustc traced, but you can also do `MIRI_LOG=miri=trace,rustc_const_eval::interpret=debug`. - if log::Level::from_str(&var).is_ok() { + if tracing::Level::from_str(&var).is_ok() { cfg.filter = Ok(format!( "rustc_middle::mir::interpret={var},rustc_const_eval::interpret={var}" )); @@ -218,10 +218,6 @@ fn rustc_logger_config() -> rustc_log::LoggerConfig { } fn init_early_loggers(early_dcx: &EarlyDiagCtxt) { - // Note that our `extern crate log` is *not* the same as rustc's; as a result, we have to - // initialize them both, and we always initialize `miri`'s first. - let env = env_logger::Env::new().filter("MIRI_LOG").write_style("MIRI_LOG_STYLE"); - env_logger::init_from_env(env); // Now for rustc. We only initialize `rustc` if the env var is set (so the user asked for it). // If it is not set, we avoid initializing now so that we can initialize later with our custom // settings, and *not* log anything for what happens before `miri` gets started. diff --git a/src/tools/miri/src/borrow_tracker/mod.rs b/src/tools/miri/src/borrow_tracker/mod.rs index 45240edea45..711323b51c2 100644 --- a/src/tools/miri/src/borrow_tracker/mod.rs +++ b/src/tools/miri/src/borrow_tracker/mod.rs @@ -2,7 +2,6 @@ use std::cell::RefCell; use std::fmt; use std::num::NonZero; -use log::trace; use smallvec::SmallVec; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs index 7740d383ee3..0fe422180f7 100644 --- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs @@ -9,8 +9,6 @@ use std::cmp; use std::fmt::Write; use std::mem; -use log::trace; - use rustc_data_structures::fx::FxHashSet; use rustc_middle::mir::{Mutability, RetagKind}; use rustc_middle::ty::{self, layout::HasParamEnv, Ty}; diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs index 291807c25ee..712c26a9afd 100644 --- a/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs +++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/stack.rs @@ -385,7 +385,7 @@ impl<'tcx> Stack { let upper = unique_range.end; for item in &mut self.borrows[lower..upper] { if item.perm() == Permission::Unique { - log::trace!("access: disabling item {:?}", item); + trace!("access: disabling item {:?}", item); visitor(*item)?; item.set_permission(Permission::Disabled); // Also update all copies of this item in the cache. diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs index 0945a5292bb..cc982865341 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs @@ -1,5 +1,3 @@ -use log::trace; - use rustc_target::abi::{Abi, Size}; use crate::borrow_tracker::{AccessKind, GlobalState, GlobalStateInner, ProtectorKind}; diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs index 80d0402fc87..a280448ae05 100644 --- a/src/tools/miri/src/concurrency/data_race.rs +++ b/src/tools/miri/src/concurrency/data_race.rs @@ -466,7 +466,7 @@ impl MemoryCellClocks { index: VectorIdx, access_size: Size, ) -> Result<(), DataRace> { - log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, thread_clocks); + trace!("Atomic read with vectors: {:#?} :: {:#?}", self, thread_clocks); let atomic = self.atomic_access(thread_clocks, access_size)?; atomic.read_vector.set_at_index(&thread_clocks.clock, index); // Make sure the last non-atomic write and all non-atomic reads were before this access. @@ -485,7 +485,7 @@ impl MemoryCellClocks { index: VectorIdx, access_size: Size, ) -> Result<(), DataRace> { - log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, thread_clocks); + trace!("Atomic write with vectors: {:#?} :: {:#?}", self, thread_clocks); let atomic = self.atomic_access(thread_clocks, access_size)?; atomic.write_vector.set_at_index(&thread_clocks.clock, index); // Make sure the last non-atomic write and all non-atomic reads were before this access. @@ -504,7 +504,7 @@ impl MemoryCellClocks { index: VectorIdx, current_span: Span, ) -> Result<(), DataRace> { - log::trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, thread_clocks); + trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, thread_clocks); if !current_span.is_dummy() { thread_clocks.clock[index].span = current_span; } @@ -533,7 +533,7 @@ impl MemoryCellClocks { write_type: NaWriteType, current_span: Span, ) -> Result<(), DataRace> { - log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, thread_clocks); + trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, thread_clocks); if !current_span.is_dummy() { thread_clocks.clock[index].span = current_span; } @@ -743,7 +743,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> { &this.machine.threads, current_span, |index, mut clocks| { - log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic); + trace!("Atomic fence on {:?} with ordering {:?}", index, atomic); // Apply data-race detection for the current fences // this treats AcqRel and SeqCst as the same as an acquire @@ -841,7 +841,7 @@ impl VClockAlloc { // Find an index, if one exists where the value // in `l` is greater than the value in `r`. fn find_gt_index(l: &VClock, r: &VClock) -> Option { - log::trace!("Find index where not {:?} <= {:?}", l, r); + trace!("Find index where not {:?} <= {:?}", l, r); let l_slice = l.as_slice(); let r_slice = r.as_slice(); l_slice @@ -1270,7 +1270,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> { // Load and log the atomic operation. // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option. let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap(); - log::trace!( + trace!( "Atomic op({}) with ordering {:?} on {:?} (size={})", access.description(), &atomic, @@ -1311,11 +1311,11 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> { )?; // Log changes to atomic memory. - if log::log_enabled!(log::Level::Trace) { + if tracing::enabled!(tracing::Level::TRACE) { for (_offset, mem_clocks) in alloc_meta.alloc_ranges.borrow().iter(base_offset, size) { - log::trace!( + trace!( "Updated atomic memory({:?}, size={}) to {:#?}", place.ptr(), size.bytes(), @@ -1530,7 +1530,7 @@ impl GlobalState { vector_info.push(thread) }; - log::trace!("Creating thread = {:?} with vector index = {:?}", thread, created_index); + trace!("Creating thread = {:?} with vector index = {:?}", thread, created_index); // Mark the chosen vector index as in use by the thread. thread_info[thread].vector_index = Some(created_index); diff --git a/src/tools/miri/src/concurrency/sync.rs b/src/tools/miri/src/concurrency/sync.rs index b948ecb8345..956a02ded0f 100644 --- a/src/tools/miri/src/concurrency/sync.rs +++ b/src/tools/miri/src/concurrency/sync.rs @@ -1,8 +1,6 @@ use std::collections::{hash_map::Entry, VecDeque}; use std::ops::Not; -use log::trace; - use rustc_data_structures::fx::FxHashMap; use rustc_index::{Idx, IndexVec}; use rustc_middle::ty::layout::TyAndLayout; @@ -71,7 +69,7 @@ struct Mutex { lock_count: usize, /// The queue of threads waiting for this mutex. queue: VecDeque, - /// Data race handle, this tracks the happens-before + /// Data race handle. This tracks the happens-before /// relationship between each mutex access. It is /// released to during unlock and acquired from during /// locking, and therefore stores the clock of the last @@ -93,7 +91,7 @@ struct RwLock { writer_queue: VecDeque, /// The queue of reader threads waiting for this lock. reader_queue: VecDeque, - /// Data race handle for writers, tracks the happens-before + /// Data race handle for writers. Tracks the happens-before /// ordering between each write access to a rwlock and is updated /// after a sequence of concurrent readers to track the happens- /// before ordering between the set of previous readers and @@ -102,7 +100,7 @@ struct RwLock { /// lock or the joined clock of the set of last threads to release /// shared reader locks. data_race: VClock, - /// Data race handle for readers, this is temporary storage + /// Data race handle for readers. This is temporary storage /// for the combined happens-before ordering for between all /// concurrent readers and the next writer, and the value /// is stored to the main data_race variable once all @@ -111,6 +109,7 @@ struct RwLock { /// must load the clock of the last write and must not /// add happens-before orderings between shared reader /// locks. + /// This is only relevant when there is an active reader. data_race_reader: VClock, } @@ -486,6 +485,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { Entry::Vacant(_) => return false, // we did not even own this lock } if let Some(data_race) = &this.machine.data_race { + // Add this to the shared-release clock of all concurrent readers. data_race.validate_lock_release_shared( &mut rwlock.data_race_reader, reader, @@ -540,20 +540,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } rwlock.writer = None; trace!("rwlock_writer_unlock: {:?} unlocked by {:?}", id, expected_writer); - // Release memory to both reader and writer vector clocks - // since this writer happens-before both the union of readers once they are finished - // and the next writer + // Release memory to next lock holder. if let Some(data_race) = &this.machine.data_race { data_race.validate_lock_release( &mut rwlock.data_race, current_writer, current_span, ); - data_race.validate_lock_release( - &mut rwlock.data_race_reader, - current_writer, - current_span, - ); } // The thread was a writer. // diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs index 754cfa4d2a8..64e1f3c5b55 100644 --- a/src/tools/miri/src/concurrency/thread.rs +++ b/src/tools/miri/src/concurrency/thread.rs @@ -8,7 +8,6 @@ use std::task::Poll; use std::time::{Duration, SystemTime}; use either::Either; -use log::trace; use rustc_data_structures::fx::FxHashMap; use rustc_hir::def_id::DefId; diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs index 19b29a41819..d47f446716b 100644 --- a/src/tools/miri/src/diagnostics.rs +++ b/src/tools/miri/src/diagnostics.rs @@ -1,8 +1,6 @@ use std::fmt::{self, Write}; use std::num::NonZero; -use log::trace; - use rustc_errors::{DiagnosticBuilder, DiagnosticMessage, Level}; use rustc_span::{SpanData, Symbol, DUMMY_SP}; use rustc_target::abi::{Align, Size}; @@ -102,10 +100,7 @@ impl MachineStopType for TerminationInfo { } fn add_args( self: Box, - _: &mut dyn FnMut( - std::borrow::Cow<'static, str>, - rustc_errors::DiagnosticArgValue, - ), + _: &mut dyn FnMut(std::borrow::Cow<'static, str>, rustc_errors::DiagnosticArgValue), ) { } } @@ -290,7 +285,10 @@ pub fn report_error<'tcx, 'mir>( ) => { ecx.handle_ice(); // print interpreter backtrace - bug!("This validation error should be impossible in Miri: {}", format_interp_error(ecx.tcx.dcx(), e)); + bug!( + "This validation error should be impossible in Miri: {}", + format_interp_error(ecx.tcx.dcx(), e) + ); } UndefinedBehavior(_) => "Undefined Behavior", ResourceExhaustion(_) => "resource exhaustion", @@ -304,7 +302,10 @@ pub fn report_error<'tcx, 'mir>( ) => "post-monomorphization error", _ => { ecx.handle_ice(); // print interpreter backtrace - bug!("This error should be impossible in Miri: {}", format_interp_error(ecx.tcx.dcx(), e)); + bug!( + "This error should be impossible in Miri: {}", + format_interp_error(ecx.tcx.dcx(), e) + ); } }; #[rustfmt::skip] diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs index 6095b8842eb..9bab9488e37 100644 --- a/src/tools/miri/src/eval.rs +++ b/src/tools/miri/src/eval.rs @@ -7,9 +7,6 @@ use std::path::PathBuf; use std::task::Poll; use std::thread; -use log::info; -use rustc_middle::ty::Ty; - use crate::concurrency::thread::TlsAllocAction; use crate::diagnostics::report_leaks; use rustc_data_structures::fx::FxHashSet; @@ -18,7 +15,7 @@ use rustc_hir::def_id::DefId; use rustc_middle::ty::{ self, layout::{LayoutCx, LayoutOf}, - TyCtxt, + Ty, TyCtxt, }; use rustc_target::spec::abi::Abi; diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index 3cee4df5885..d9b4363d604 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -3,8 +3,6 @@ use std::iter; use std::num::NonZero; use std::time::Duration; -use log::trace; - use rustc_apfloat::ieee::{Double, Single}; use rustc_apfloat::Float; use rustc_hir::def::{DefKind, Namespace}; diff --git a/src/tools/miri/src/intptrcast.rs b/src/tools/miri/src/intptrcast.rs index 68c9a7660eb..3fe127f9732 100644 --- a/src/tools/miri/src/intptrcast.rs +++ b/src/tools/miri/src/intptrcast.rs @@ -2,7 +2,6 @@ use std::cell::RefCell; use std::cmp::max; use std::collections::hash_map::Entry; -use log::trace; use rand::Rng; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index 305c71fb0f9..c567949102f 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -63,6 +63,8 @@ extern crate rustc_middle; extern crate rustc_session; extern crate rustc_span; extern crate rustc_target; +#[macro_use] +extern crate tracing; // Necessary to pull in object code as the rest of the rustc crates are shipped only as rmeta // files. diff --git a/src/tools/miri/src/operator.rs b/src/tools/miri/src/operator.rs index 6f19dead2e9..d99be39177b 100644 --- a/src/tools/miri/src/operator.rs +++ b/src/tools/miri/src/operator.rs @@ -1,7 +1,5 @@ use std::iter; -use log::trace; - use rand::{seq::IteratorRandom, Rng}; use rustc_apfloat::{Float, FloatConvert}; use rustc_middle::mir; diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs index bf90d1468bb..0645c1f176e 100644 --- a/src/tools/miri/src/shims/foreign_items.rs +++ b/src/tools/miri/src/shims/foreign_items.rs @@ -1,7 +1,5 @@ use std::{collections::hash_map::Entry, io::Write, iter, path::Path}; -use log::trace; - use rustc_apfloat::Float; use rustc_ast::expand::allocator::AllocatorKind; use rustc_hir::{ diff --git a/src/tools/miri/src/shims/intrinsics/mod.rs b/src/tools/miri/src/shims/intrinsics/mod.rs index df2761bfaf4..602e8b31b01 100644 --- a/src/tools/miri/src/shims/intrinsics/mod.rs +++ b/src/tools/miri/src/shims/intrinsics/mod.rs @@ -3,8 +3,6 @@ mod simd; use std::iter; -use log::trace; - use rand::Rng; use rustc_apfloat::{Float, Round}; use rustc_middle::ty::layout::LayoutOf; diff --git a/src/tools/miri/src/shims/panic.rs b/src/tools/miri/src/shims/panic.rs index 28652c25c24..4c054d8dc8a 100644 --- a/src/tools/miri/src/shims/panic.rs +++ b/src/tools/miri/src/shims/panic.rs @@ -11,8 +11,6 @@ //! gets popped *during unwinding*, we take the panic payload and store it according to the extra //! metadata we remembered when pushing said frame. -use log::trace; - use rustc_ast::Mutability; use rustc_middle::{mir, ty}; use rustc_span::Symbol; diff --git a/src/tools/miri/src/shims/tls.rs b/src/tools/miri/src/shims/tls.rs index b319516c25b..84c1feb88e9 100644 --- a/src/tools/miri/src/shims/tls.rs +++ b/src/tools/miri/src/shims/tls.rs @@ -4,8 +4,6 @@ use std::collections::btree_map::Entry as BTreeEntry; use std::collections::BTreeMap; use std::task::Poll; -use log::trace; - use rustc_middle::ty; use rustc_target::abi::{HasDataLayout, Size}; use rustc_target::spec::abi::Abi; diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs index 35036ce078d..b5cd18396a2 100644 --- a/src/tools/miri/src/shims/unix/foreign_items.rs +++ b/src/tools/miri/src/shims/unix/foreign_items.rs @@ -1,8 +1,6 @@ use std::ffi::OsStr; use std::str; -use log::trace; - use rustc_middle::ty::layout::LayoutOf; use rustc_span::Symbol; use rustc_target::abi::{Align, Size}; @@ -262,6 +260,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { "mmap" => { let [addr, length, prot, flags, fd, offset] = this.check_shim(abi, Abi::C {unwind: false}, link_name, args)?; + let offset = this.read_scalar(offset)?.to_int(this.libc_ty_layout("off_t").size)?; let ptr = this.mmap(addr, length, prot, flags, fd, offset)?; this.write_scalar(ptr, dest)?; } @@ -711,6 +710,25 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } } + "sched_getaffinity" => { + // FreeBSD supports it as well since 13.1 (as a wrapper of cpuset_getaffinity) + if !matches!(&*this.tcx.sess.target.os, "linux" | "freebsd") { + throw_unsup_format!( + "`sched_getaffinity` is not supported on {}", + this.tcx.sess.target.os + ); + } + let [pid, cpusetsize, mask] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + this.read_scalar(pid)?.to_i32()?; + this.read_target_usize(cpusetsize)?; + this.deref_pointer_as(mask, this.libc_ty_layout("cpu_set_t"))?; + // FIXME: we just return an error; `num_cpus` then falls back to `sysconf`. + let einval = this.eval_libc("EINVAL"); + this.set_last_error(einval)?; + this.write_scalar(Scalar::from_i32(-1), dest)?; + } + // Platform-specific shims _ => { let target_os = &*this.tcx.sess.target.os; diff --git a/src/tools/miri/src/shims/unix/fs.rs b/src/tools/miri/src/shims/unix/fs.rs index 53f975baa89..b141ca4a019 100644 --- a/src/tools/miri/src/shims/unix/fs.rs +++ b/src/tools/miri/src/shims/unix/fs.rs @@ -8,8 +8,6 @@ use std::io::{self, ErrorKind, IsTerminal, Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; use std::time::SystemTime; -use log::trace; - use rustc_data_structures::fx::FxHashMap; use rustc_middle::ty::TyCtxt; use rustc_target::abi::Size; diff --git a/src/tools/miri/src/shims/unix/linux/foreign_items.rs b/src/tools/miri/src/shims/unix/linux/foreign_items.rs index 6937e0f089e..b9215129674 100644 --- a/src/tools/miri/src/shims/unix/linux/foreign_items.rs +++ b/src/tools/miri/src/shims/unix/linux/foreign_items.rs @@ -9,6 +9,7 @@ use shims::unix::fs::EvalContextExt as _; use shims::unix::linux::fd::EvalContextExt as _; use shims::unix::linux::mem::EvalContextExt as _; use shims::unix::linux::sync::futex; +use shims::unix::mem::EvalContextExt as _; use shims::unix::sync::EvalContextExt as _; use shims::unix::thread::EvalContextExt as _; @@ -43,6 +44,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let result = this.linux_readdir64(dirp)?; this.write_scalar(result, dest)?; } + "mmap64" => { + let [addr, length, prot, flags, fd, offset] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let offset = this.read_scalar(offset)?.to_i64()?; + let ptr = this.mmap(addr, length, prot, flags, fd, offset.into())?; + this.write_scalar(ptr, dest)?; + } + // Linux-only "sync_file_range" => { let [fd, offset, nbytes, flags] = @@ -197,17 +206,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; getrandom(this, ptr, len, flags, dest)?; } - "sched_getaffinity" => { - let [pid, cpusetsize, mask] = - this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - this.read_scalar(pid)?.to_i32()?; - this.read_target_usize(cpusetsize)?; - this.deref_pointer_as(mask, this.libc_ty_layout("cpu_set_t"))?; - // FIXME: we just return an error; `num_cpus` then falls back to `sysconf`. - let einval = this.eval_libc("EINVAL"); - this.set_last_error(einval)?; - this.write_scalar(Scalar::from_i32(-1), dest)?; - } // Incomplete shims that we "stub out" just to get pre-main initialization code to work. // These shims are enabled only when the caller is in the standard library. diff --git a/src/tools/miri/src/shims/unix/mem.rs b/src/tools/miri/src/shims/unix/mem.rs index d7dc17fa89f..d3470893dbb 100644 --- a/src/tools/miri/src/shims/unix/mem.rs +++ b/src/tools/miri/src/shims/unix/mem.rs @@ -26,7 +26,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { prot: &OpTy<'tcx, Provenance>, flags: &OpTy<'tcx, Provenance>, fd: &OpTy<'tcx, Provenance>, - offset: &OpTy<'tcx, Provenance>, + offset: i128, ) -> InterpResult<'tcx, Scalar> { let this = self.eval_context_mut(); @@ -36,7 +36,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let prot = this.read_scalar(prot)?.to_i32()?; let flags = this.read_scalar(flags)?.to_i32()?; let fd = this.read_scalar(fd)?.to_i32()?; - let offset = this.read_target_usize(offset)?; let map_private = this.eval_libc_i32("MAP_PRIVATE"); let map_anonymous = this.eval_libc_i32("MAP_ANONYMOUS"); diff --git a/src/tools/miri/src/shims/x86/avx.rs b/src/tools/miri/src/shims/x86/avx.rs new file mode 100644 index 00000000000..65de1607595 --- /dev/null +++ b/src/tools/miri/src/shims/x86/avx.rs @@ -0,0 +1,417 @@ +use rustc_apfloat::{ieee::Double, ieee::Single}; +use rustc_middle::mir; +use rustc_middle::ty::layout::LayoutOf as _; +use rustc_middle::ty::Ty; +use rustc_span::Symbol; +use rustc_target::spec::abi::Abi; + +use super::{ + bin_op_simd_float_all, conditional_dot_product, convert_float_to_int, horizontal_bin_op, + round_all, test_bits_masked, test_high_bits_masked, unary_op_ps, FloatBinOp, FloatUnaryOp, +}; +use crate::*; +use shims::foreign_items::EmulateForeignItemResult; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ + fn emulate_x86_avx_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx, Provenance>], + dest: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx, EmulateForeignItemResult> { + let this = self.eval_context_mut(); + this.expect_target_feature_for_intrinsic(link_name, "avx")?; + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.avx.").unwrap(); + + match unprefixed_name { + // Used to implement _mm256_min_ps and _mm256_max_ps functions. + // Note that the semantics are a bit different from Rust simd_min + // and simd_max intrinsics regarding handling of NaN and -0.0: Rust + // matches the IEEE min/max operations, while x86 has different + // semantics. + "min.ps.256" | "max.ps.256" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let which = match unprefixed_name { + "min.ps.256" => FloatBinOp::Min, + "max.ps.256" => FloatBinOp::Max, + _ => unreachable!(), + }; + + bin_op_simd_float_all::(this, which, left, right, dest)?; + } + // Used to implement _mm256_min_pd and _mm256_max_pd functions. + "min.pd.256" | "max.pd.256" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let which = match unprefixed_name { + "min.pd.256" => FloatBinOp::Min, + "max.pd.256" => FloatBinOp::Max, + _ => unreachable!(), + }; + + bin_op_simd_float_all::(this, which, left, right, dest)?; + } + // Used to implement the _mm256_round_ps function. + // Rounds the elements of `op` according to `rounding`. + "round.ps.256" => { + let [op, rounding] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + round_all::(this, op, rounding, dest)?; + } + // Used to implement the _mm256_round_pd function. + // Rounds the elements of `op` according to `rounding`. + "round.pd.256" => { + let [op, rounding] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + round_all::(this, op, rounding, dest)?; + } + // Used to implement _mm256_{sqrt,rcp,rsqrt}_ps functions. + // Performs the operations on all components of `op`. + "sqrt.ps.256" | "rcp.ps.256" | "rsqrt.ps.256" => { + let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let which = match unprefixed_name { + "sqrt.ps.256" => FloatUnaryOp::Sqrt, + "rcp.ps.256" => FloatUnaryOp::Rcp, + "rsqrt.ps.256" => FloatUnaryOp::Rsqrt, + _ => unreachable!(), + }; + + unary_op_ps(this, which, op, dest)?; + } + // Used to implement the _mm256_dp_ps function. + "dp.ps.256" => { + let [left, right, imm] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + conditional_dot_product(this, left, right, imm, dest)?; + } + // Used to implement the _mm256_h{add,sub}_p{s,d} functions. + // Horizontally add/subtract adjacent floating point values + // in `left` and `right`. + "hadd.ps.256" | "hadd.pd.256" | "hsub.ps.256" | "hsub.pd.256" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let which = match unprefixed_name { + "hadd.ps.256" | "hadd.pd.256" => mir::BinOp::Add, + "hsub.ps.256" | "hsub.pd.256" => mir::BinOp::Sub, + _ => unreachable!(), + }; + + horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?; + } + // Used to implement the _mm256_cmp_ps function. + // Performs a comparison operation on each component of `left` + // and `right`. For each component, returns 0 if false or u32::MAX + // if true. + "cmp.ps.256" => { + let [left, right, imm] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let which = + FloatBinOp::cmp_from_imm(this, this.read_scalar(imm)?.to_i8()?, link_name)?; + + bin_op_simd_float_all::(this, which, left, right, dest)?; + } + // Used to implement the _mm256_cmp_pd function. + // Performs a comparison operation on each component of `left` + // and `right`. For each component, returns 0 if false or u64::MAX + // if true. + "cmp.pd.256" => { + let [left, right, imm] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let which = + FloatBinOp::cmp_from_imm(this, this.read_scalar(imm)?.to_i8()?, link_name)?; + + bin_op_simd_float_all::(this, which, left, right, dest)?; + } + // Used to implement the _mm256_cvtps_epi32, _mm256_cvttps_epi32, _mm256_cvtpd_epi32 + // and _mm256_cvttpd_epi32 functions. + // Converts packed f32/f64 to packed i32. + "cvt.ps2dq.256" | "cvtt.ps2dq.256" | "cvt.pd2dq.256" | "cvtt.pd2dq.256" => { + let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let rnd = match unprefixed_name { + // "current SSE rounding mode", assume nearest + "cvt.ps2dq.256" | "cvt.pd2dq.256" => rustc_apfloat::Round::NearestTiesToEven, + // always truncate + "cvtt.ps2dq.256" | "cvtt.pd2dq.256" => rustc_apfloat::Round::TowardZero, + _ => unreachable!(), + }; + + convert_float_to_int(this, op, rnd, dest)?; + } + // Used to implement the _mm_permutevar_ps and _mm256_permutevar_ps functions. + // Shuffles 32-bit floats from `data` using `control` as control. Each 128-bit + // chunk is shuffled independently: this means that we view the vector as a + // sequence of 4-element arrays, and we shuffle each of these arrays, where + // `control` determines which element of the current `data` array is written. + "vpermilvar.ps" | "vpermilvar.ps.256" => { + let [data, control] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (data, data_len) = this.operand_to_simd(data)?; + let (control, control_len) = this.operand_to_simd(control)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, data_len); + assert_eq!(dest_len, control_len); + + for i in 0..dest_len { + let control = this.project_index(&control, i)?; + + // Each 128-bit chunk is shuffled independently. Since each chunk contains + // four 32-bit elements, only two bits from `control` are used. To read the + // value from the current chunk, add the destination index truncated to a multiple + // of 4. + let chunk_base = i & !0b11; + let src_i = u64::from(this.read_scalar(&control)?.to_u32()? & 0b11) + .checked_add(chunk_base) + .unwrap(); + + this.copy_op( + &this.project_index(&data, src_i)?, + &this.project_index(&dest, i)?, + )?; + } + } + // Used to implement the _mm_permutevar_pd and _mm256_permutevar_pd functions. + // Shuffles 64-bit floats from `left` using `right` as control. Each 128-bit + // chunk is shuffled independently: this means that we view the vector as + // a sequence of 2-element arrays, and we shuffle each of these arrays, + // where `right` determines which element of the current `left` array is + // written. + "vpermilvar.pd" | "vpermilvar.pd.256" => { + let [data, control] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (data, data_len) = this.operand_to_simd(data)?; + let (control, control_len) = this.operand_to_simd(control)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, data_len); + assert_eq!(dest_len, control_len); + + for i in 0..dest_len { + let control = this.project_index(&control, i)?; + + // Each 128-bit chunk is shuffled independently. Since each chunk contains + // two 64-bit elements, only the second bit from `control` is used (yes, the + // second instead of the first, ask Intel). To read the value from the current + // chunk, add the destination index truncated to a multiple of 2. + let chunk_base = i & !1; + let src_i = ((this.read_scalar(&control)?.to_u64()? >> 1) & 1) + .checked_add(chunk_base) + .unwrap(); + + this.copy_op( + &this.project_index(&data, src_i)?, + &this.project_index(&dest, i)?, + )?; + } + } + // Used to implement the _mm256_permute2f128_ps, _mm256_permute2f128_pd and + // _mm256_permute2f128_si256 functions. Regardless of the suffix in the name + // thay all can be considered to operate on vectors of 128-bit elements. + // For each 128-bit element of `dest`, copies one from `left`, `right` or + // zero, according to `imm`. + "vperm2f128.ps.256" | "vperm2f128.pd.256" | "vperm2f128.si.256" => { + let [left, right, imm] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + assert_eq!(dest.layout, left.layout); + assert_eq!(dest.layout, right.layout); + assert_eq!(dest.layout.size.bits(), 256); + + // Transmute to `[u128; 2]` to process each 128-bit chunk independently. + let u128x2_layout = + this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, 2))?; + let left = left.transmute(u128x2_layout, this)?; + let right = right.transmute(u128x2_layout, this)?; + let dest = dest.transmute(u128x2_layout, this)?; + + let imm = this.read_scalar(imm)?.to_u8()?; + + for i in 0..2 { + let dest = this.project_index(&dest, i)?; + + let imm = match i { + 0 => imm & 0xF, + 1 => imm >> 4, + _ => unreachable!(), + }; + if imm & 0b100 != 0 { + this.write_scalar(Scalar::from_u128(0), &dest)?; + } else { + let src = match imm { + 0b00 => this.project_index(&left, 0)?, + 0b01 => this.project_index(&left, 1)?, + 0b10 => this.project_index(&right, 0)?, + 0b11 => this.project_index(&right, 1)?, + _ => unreachable!(), + }; + this.copy_op(&src, &dest)?; + } + } + } + // Used to implement the _mm_maskload_ps, _mm_maskload_pd, _mm256_maskload_ps + // and _mm256_maskload_pd functions. + // For the element `i`, if the high bit of the `i`-th element of `mask` + // is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is + // loaded. + "maskload.ps" | "maskload.pd" | "maskload.ps.256" | "maskload.pd.256" => { + let [ptr, mask] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + mask_load(this, ptr, mask, dest)?; + } + // Used to implement the _mm_maskstore_ps, _mm_maskstore_pd, _mm256_maskstore_ps + // and _mm256_maskstore_pd functions. + // For the element `i`, if the high bit of the element `i`-th of `mask` + // is one, it is stored into `ptr.wapping_add(i)`. + // Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores. + "maskstore.ps" | "maskstore.pd" | "maskstore.ps.256" | "maskstore.pd.256" => { + let [ptr, mask, value] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + mask_store(this, ptr, mask, value)?; + } + // Used to implement the _mm256_lddqu_si256 function. + // Reads a 256-bit vector from an unaligned pointer. This intrinsic + // is expected to perform better than a regular unaligned read when + // the data crosses a cache line, but for Miri this is just a regular + // unaligned read. + "ldu.dq.256" => { + let [src_ptr] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let src_ptr = this.read_pointer(src_ptr)?; + let dest = dest.force_mplace(this)?; + + // Unaligned copy, which is what we want. + this.mem_copy(src_ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?; + } + // Used to implement the _mm256_testz_si256, _mm256_testc_si256 and + // _mm256_testnzc_si256 functions. + // Tests `op & mask == 0`, `op & mask == mask` or + // `op & mask != 0 && op & mask != mask` + "ptestz.256" | "ptestc.256" | "ptestnzc.256" => { + let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (all_zero, masked_set) = test_bits_masked(this, op, mask)?; + let res = match unprefixed_name { + "ptestz.256" => all_zero, + "ptestc.256" => masked_set, + "ptestnzc.256" => !all_zero && !masked_set, + _ => unreachable!(), + }; + + this.write_scalar(Scalar::from_i32(res.into()), dest)?; + } + // Used to implement the _mm256_testz_pd, _mm256_testc_pd, _mm256_testnzc_pd + // _mm_testz_pd, _mm_testc_pd, _mm_testnzc_pd, _mm256_testz_ps, + // _mm256_testc_ps, _mm256_testnzc_ps, _mm_testz_ps, _mm_testc_ps and + // _mm_testnzc_ps functions. + // Calculates two booleans: + // `direct`, which is true when the highest bit of each element of `op & mask` is zero. + // `negated`, which is true when the highest bit of each element of `!op & mask` is zero. + // Return `direct` (testz), `negated` (testc) or `!direct & !negated` (testnzc) + "vtestz.pd.256" | "vtestc.pd.256" | "vtestnzc.pd.256" | "vtestz.pd" | "vtestc.pd" + | "vtestnzc.pd" | "vtestz.ps.256" | "vtestc.ps.256" | "vtestnzc.ps.256" + | "vtestz.ps" | "vtestc.ps" | "vtestnzc.ps" => { + let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (direct, negated) = test_high_bits_masked(this, op, mask)?; + let res = match unprefixed_name { + "vtestz.pd.256" | "vtestz.pd" | "vtestz.ps.256" | "vtestz.ps" => direct, + "vtestc.pd.256" | "vtestc.pd" | "vtestc.ps.256" | "vtestc.ps" => negated, + "vtestnzc.pd.256" | "vtestnzc.pd" | "vtestnzc.ps.256" | "vtestnzc.ps" => + !direct && !negated, + _ => unreachable!(), + }; + + this.write_scalar(Scalar::from_i32(res.into()), dest)?; + } + _ => return Ok(EmulateForeignItemResult::NotSupported), + } + Ok(EmulateForeignItemResult::NeedsJumping) + } +} + +/// Conditionally loads from `ptr` according the high bit of each +/// element of `mask`. `ptr` does not need to be aligned. +fn mask_load<'tcx>( + this: &mut crate::MiriInterpCx<'_, 'tcx>, + ptr: &OpTy<'tcx, Provenance>, + mask: &OpTy<'tcx, Provenance>, + dest: &PlaceTy<'tcx, Provenance>, +) -> InterpResult<'tcx, ()> { + let (mask, mask_len) = this.operand_to_simd(mask)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, mask_len); + + let mask_item_size = mask.layout.field(this, 0).size; + let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap(); + + let ptr = this.read_pointer(ptr)?; + for i in 0..dest_len { + let mask = this.project_index(&mask, i)?; + let dest = this.project_index(&dest, i)?; + + if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 { + // Size * u64 is implemented as always checked + #[allow(clippy::arithmetic_side_effects)] + let ptr = ptr.wrapping_offset(dest.layout.size * i, &this.tcx); + // Unaligned copy, which is what we want. + this.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?; + } else { + this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?; + } + } + + Ok(()) +} + +/// Conditionally stores into `ptr` according the high bit of each +/// element of `mask`. `ptr` does not need to be aligned. +fn mask_store<'tcx>( + this: &mut crate::MiriInterpCx<'_, 'tcx>, + ptr: &OpTy<'tcx, Provenance>, + mask: &OpTy<'tcx, Provenance>, + value: &OpTy<'tcx, Provenance>, +) -> InterpResult<'tcx, ()> { + let (mask, mask_len) = this.operand_to_simd(mask)?; + let (value, value_len) = this.operand_to_simd(value)?; + + assert_eq!(value_len, mask_len); + + let mask_item_size = mask.layout.field(this, 0).size; + let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap(); + + let ptr = this.read_pointer(ptr)?; + for i in 0..value_len { + let mask = this.project_index(&mask, i)?; + let value = this.project_index(&value, i)?; + + if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 { + // Size * u64 is implemented as always checked + #[allow(clippy::arithmetic_side_effects)] + let ptr = ptr.wrapping_offset(value.layout.size * i, &this.tcx); + // Unaligned copy, which is what we want. + this.mem_copy(value.ptr(), ptr, value.layout.size, /*nonoverlapping*/ true)?; + } + } + + Ok(()) +} diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index b24ea8aec84..9cfee20014f 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -1,6 +1,8 @@ use rand::Rng as _; -use rustc_apfloat::{ieee::Single, Float as _}; +use rustc_apfloat::{ieee::Single, Float}; +use rustc_middle::ty::layout::LayoutOf as _; +use rustc_middle::ty::Ty; use rustc_middle::{mir, ty}; use rustc_span::Symbol; use rustc_target::abi::Size; @@ -11,6 +13,7 @@ use helpers::bool_to_simd_element; use shims::foreign_items::EmulateForeignItemResult; mod aesni; +mod avx; mod sse; mod sse2; mod sse3; @@ -115,6 +118,11 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this, link_name, abi, args, dest, ); } + name if name.starts_with("avx.") => { + return avx::EvalContextExt::emulate_x86_avx_intrinsic( + this, link_name, abi, args, dest, + ); + } _ => return Ok(EmulateForeignItemResult::NotSupported), } @@ -296,10 +304,7 @@ fn bin_op_simd_float_first<'tcx, F: rustc_apfloat::Float>( this.write_scalar(res0, &this.project_index(&dest, 0)?)?; for i in 1..dest_len { - this.copy_op( - &this.project_index(&left, i)?, - &this.project_index(&dest, i)?, - )?; + this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?; } Ok(()) @@ -420,10 +425,7 @@ fn unary_op_ss<'tcx>( this.write_scalar(res0, &this.project_index(&dest, 0)?)?; for i in 1..dest_len { - this.copy_op( - &this.project_index(&op, i)?, - &this.project_index(&dest, i)?, - )?; + this.copy_op(&this.project_index(&op, i)?, &this.project_index(&dest, i)?)?; } Ok(()) @@ -479,10 +481,7 @@ fn round_first<'tcx, F: rustc_apfloat::Float>( )?; for i in 1..dest_len { - this.copy_op( - &this.project_index(&left, i)?, - &this.project_index(&dest, i)?, - )?; + this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?; } Ok(()) @@ -572,8 +571,65 @@ fn convert_float_to_int<'tcx>( Ok(()) } +/// Splits `left`, `right` and `dest` (which must be SIMD vectors) +/// into 128-bit chuncks. +/// +/// `left`, `right` and `dest` cannot have different types. +/// +/// Returns a tuple where: +/// * The first element is the number of 128-bit chunks (let's call it `N`). +/// * The second element is the number of elements per chunk (let's call it `M`). +/// * The third element is the `left` vector split into chunks, i.e, it's +/// type is `[[T; M]; N]`. +/// * The fourth element is the `right` vector split into chunks. +/// * The fifth element is the `dest` vector split into chunks. +fn split_simd_to_128bit_chunks<'tcx>( + this: &mut crate::MiriInterpCx<'_, 'tcx>, + left: &OpTy<'tcx, Provenance>, + right: &OpTy<'tcx, Provenance>, + dest: &PlaceTy<'tcx, Provenance>, +) -> InterpResult< + 'tcx, + (u64, u64, MPlaceTy<'tcx, Provenance>, MPlaceTy<'tcx, Provenance>, MPlaceTy<'tcx, Provenance>), +> { + assert_eq!(dest.layout, left.layout); + assert_eq!(dest.layout, right.layout); + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + + assert_eq!(dest.layout.size.bits() % 128, 0); + let num_chunks = dest.layout.size.bits() / 128; + assert_eq!(dest_len.checked_rem(num_chunks), Some(0)); + let items_per_chunk = dest_len.checked_div(num_chunks).unwrap(); + + // Transmute to `[[T; items_per_chunk]; num_chunks]` + let element_layout = left.layout.field(this, 0); + let chunked_layout = this.layout_of(Ty::new_array( + this.tcx.tcx, + Ty::new_array(this.tcx.tcx, element_layout.ty, items_per_chunk), + num_chunks, + ))?; + let left = left.transmute(chunked_layout, this)?; + let right = right.transmute(chunked_layout, this)?; + let dest = dest.transmute(chunked_layout, this)?; + + Ok((num_chunks, items_per_chunk, left, right, dest)) +} + /// Horizontaly performs `which` operation on adjacent values of /// `left` and `right` SIMD vectors and stores the result in `dest`. +/// "Horizontal" means that the i-th output element is calculated +/// from the elements 2*i and 2*i+1 of the concatenation of `left` and +/// `right`. +/// +/// Each 128-bit chunk is treated independently (i.e., the value for +/// the is i-th 128-bit chunk of `dest` is calculated with the i-th +/// 128-bit chunks of `left` and `right`). fn horizontal_bin_op<'tcx>( this: &mut crate::MiriInterpCx<'_, 'tcx>, which: mir::BinOp, @@ -582,32 +638,34 @@ fn horizontal_bin_op<'tcx>( right: &OpTy<'tcx, Provenance>, dest: &PlaceTy<'tcx, Provenance>, ) -> InterpResult<'tcx, ()> { - let (left, left_len) = this.operand_to_simd(left)?; - let (right, right_len) = this.operand_to_simd(right)?; - let (dest, dest_len) = this.place_to_simd(dest)?; + let (num_chunks, items_per_chunk, left, right, dest) = + split_simd_to_128bit_chunks(this, left, right, dest)?; - assert_eq!(dest_len, left_len); - assert_eq!(dest_len, right_len); - assert_eq!(dest_len % 2, 0); + let middle = items_per_chunk / 2; + for i in 0..num_chunks { + let left = this.project_index(&left, i)?; + let right = this.project_index(&right, i)?; + let dest = this.project_index(&dest, i)?; - let middle = dest_len / 2; - for i in 0..dest_len { - // `i` is the index in `dest` - // `j` is the index of the 2-item chunk in `src` - let (j, src) = - if i < middle { (i, &left) } else { (i.checked_sub(middle).unwrap(), &right) }; - // `base_i` is the index of the first item of the 2-item chunk in `src` - let base_i = j.checked_mul(2).unwrap(); - let lhs = this.read_immediate(&this.project_index(src, base_i)?)?; - let rhs = this.read_immediate(&this.project_index(src, base_i.checked_add(1).unwrap())?)?; + for j in 0..items_per_chunk { + // `j` is the index in `dest` + // `k` is the index of the 2-item chunk in `src` + let (k, src) = + if j < middle { (j, &left) } else { (j.checked_sub(middle).unwrap(), &right) }; + // `base_i` is the index of the first item of the 2-item chunk in `src` + let base_i = k.checked_mul(2).unwrap(); + let lhs = this.read_immediate(&this.project_index(src, base_i)?)?; + let rhs = + this.read_immediate(&this.project_index(src, base_i.checked_add(1).unwrap())?)?; - let res = if saturating { - Immediate::from(this.saturating_arith(which, &lhs, &rhs)?) - } else { - *this.wrapping_binary_op(which, &lhs, &rhs)? - }; + let res = if saturating { + Immediate::from(this.saturating_arith(which, &lhs, &rhs)?) + } else { + *this.wrapping_binary_op(which, &lhs, &rhs)? + }; - this.write_immediate(res, &this.project_index(&dest, i)?)?; + this.write_immediate(res, &this.project_index(&dest, j)?)?; + } } Ok(()) @@ -617,6 +675,10 @@ fn horizontal_bin_op<'tcx>( /// `left` and `right` using the high 4 bits in `imm`, sums the calculated /// products (up to 4), and conditionally stores the sum in `dest` using /// the low 4 bits of `imm`. +/// +/// Each 128-bit chunk is treated independently (i.e., the value for +/// the is i-th 128-bit chunk of `dest` is calculated with the i-th +/// 128-bit blocks of `left` and `right`). fn conditional_dot_product<'tcx>( this: &mut crate::MiriInterpCx<'_, 'tcx>, left: &OpTy<'tcx, Provenance>, @@ -624,39 +686,43 @@ fn conditional_dot_product<'tcx>( imm: &OpTy<'tcx, Provenance>, dest: &PlaceTy<'tcx, Provenance>, ) -> InterpResult<'tcx, ()> { - let (left, left_len) = this.operand_to_simd(left)?; - let (right, right_len) = this.operand_to_simd(right)?; - let (dest, dest_len) = this.place_to_simd(dest)?; + let (num_chunks, items_per_chunk, left, right, dest) = + split_simd_to_128bit_chunks(this, left, right, dest)?; - assert_eq!(left_len, right_len); - assert!(dest_len <= 4); + let element_layout = left.layout.field(this, 0).field(this, 0); + assert!(items_per_chunk <= 4); - let imm = this.read_scalar(imm)?.to_u8()?; + // `imm` is a `u8` for SSE4.1 or an `i32` for AVX :/ + let imm = this.read_scalar(imm)?.to_uint(imm.layout.size)?; - let element_layout = left.layout.field(this, 0); - - // Calculate dot product - // Elements are floating point numbers, but we can use `from_int` - // because the representation of 0.0 is all zero bits. - let mut sum = ImmTy::from_int(0u8, element_layout); - for i in 0..left_len { - if imm & (1 << i.checked_add(4).unwrap()) != 0 { - let left = this.read_immediate(&this.project_index(&left, i)?)?; - let right = this.read_immediate(&this.project_index(&right, i)?)?; - - let mul = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?; - sum = this.wrapping_binary_op(mir::BinOp::Add, &sum, &mul)?; - } - } - - // Write to destination (conditioned to imm) - for i in 0..dest_len { + for i in 0..num_chunks { + let left = this.project_index(&left, i)?; + let right = this.project_index(&right, i)?; let dest = this.project_index(&dest, i)?; - if imm & (1 << i) != 0 { - this.write_immediate(*sum, &dest)?; - } else { - this.write_scalar(Scalar::from_int(0u8, element_layout.size), &dest)?; + // Calculate dot product + // Elements are floating point numbers, but we can use `from_int` + // for the initial value because the representation of 0.0 is all zero bits. + let mut sum = ImmTy::from_int(0u8, element_layout); + for j in 0..items_per_chunk { + if imm & (1 << j.checked_add(4).unwrap()) != 0 { + let left = this.read_immediate(&this.project_index(&left, j)?)?; + let right = this.read_immediate(&this.project_index(&right, j)?)?; + + let mul = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?; + sum = this.wrapping_binary_op(mir::BinOp::Add, &sum, &mul)?; + } + } + + // Write to destination (conditioned to imm) + for j in 0..items_per_chunk { + let dest = this.project_index(&dest, j)?; + + if imm & (1 << j) != 0 { + this.write_immediate(*sum, &dest)?; + } else { + this.write_scalar(Scalar::from_int(0u8, element_layout.size), &dest)?; + } } } @@ -693,3 +759,36 @@ fn test_bits_masked<'tcx>( Ok((all_zero, masked_set)) } + +/// Calculates two booleans. +/// +/// The first is true when the highest bit of each element of `op & mask` is zero. +/// The second is true when the highest bit of each element of `!op & mask` is zero. +fn test_high_bits_masked<'tcx>( + this: &crate::MiriInterpCx<'_, 'tcx>, + op: &OpTy<'tcx, Provenance>, + mask: &OpTy<'tcx, Provenance>, +) -> InterpResult<'tcx, (bool, bool)> { + assert_eq!(op.layout, mask.layout); + + let (op, op_len) = this.operand_to_simd(op)?; + let (mask, mask_len) = this.operand_to_simd(mask)?; + + assert_eq!(op_len, mask_len); + + let high_bit_offset = op.layout.field(this, 0).size.bits().checked_sub(1).unwrap(); + + let mut direct = true; + let mut negated = true; + for i in 0..op_len { + let op = this.project_index(&op, i)?; + let mask = this.project_index(&mask, i)?; + + let op = this.read_scalar(&op)?.to_uint(op.layout.size)?; + let mask = this.read_scalar(&mask)?.to_uint(mask.layout.size)?; + direct &= (op & mask) >> high_bit_offset == 0; + negated &= (!op & mask) >> high_bit_offset == 0; + } + + Ok((direct, negated)) +} diff --git a/src/tools/miri/src/shims/x86/sse.rs b/src/tools/miri/src/shims/x86/sse.rs index 9fb947cb2a3..da0db92738f 100644 --- a/src/tools/miri/src/shims/x86/sse.rs +++ b/src/tools/miri/src/shims/x86/sse.rs @@ -208,10 +208,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this.write_immediate(*res0, &dest0)?; for i in 1..dest_len { - this.copy_op( - &this.project_index(&left, i)?, - &this.project_index(&dest, i)?, - )?; + this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?; } } _ => return Ok(EmulateForeignItemResult::NotSupported), diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs index e5c8267320a..b34b93e3739 100644 --- a/src/tools/miri/src/shims/x86/sse2.rs +++ b/src/tools/miri/src/shims/x86/sse2.rs @@ -440,10 +440,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this.write_scalar(res0, &this.project_index(&dest, 0)?)?; for i in 1..dest_len { - this.copy_op( - &this.project_index(&op, i)?, - &this.project_index(&dest, i)?, - )?; + this.copy_op(&this.project_index(&op, i)?, &this.project_index(&dest, i)?)?; } } // Used to implement _mm_sqrt_pd functions. @@ -580,10 +577,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: // Copy remianing from `left` for i in 1..dest_len { - this.copy_op( - &this.project_index(&left, i)?, - &this.project_index(&dest, i)?, - )?; + this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?; } } // Used to implement the `_mm_pause` function. diff --git a/src/tools/miri/src/shims/x86/sse41.rs b/src/tools/miri/src/shims/x86/sse41.rs index 2abd10fa7a7..32b1fe43c58 100644 --- a/src/tools/miri/src/shims/x86/sse41.rs +++ b/src/tools/miri/src/shims/x86/sse41.rs @@ -57,10 +57,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this.write_immediate(*src_value, &dest)?; } else { // copy from `left` - this.copy_op( - &this.project_index(&left, i)?, - &dest, - )?; + this.copy_op(&this.project_index(&left, i)?, &dest)?; } } } diff --git a/src/tools/miri/tests/fail/enum-set-discriminant-niche-variant-wrong.rs b/src/tools/miri/tests/fail/enum-set-discriminant-niche-variant-wrong.rs index 7097aa0c43a..428f371ca51 100644 --- a/src/tools/miri/tests/fail/enum-set-discriminant-niche-variant-wrong.rs +++ b/src/tools/miri/tests/fail/enum-set-discriminant-niche-variant-wrong.rs @@ -4,11 +4,11 @@ use std::intrinsics::mir::*; use std::num::NonZeroI32; -// We define our own option type so that we can control the varian indices. +// We define our own option type so that we can control the variant indices. #[allow(unused)] enum Option { - None, - Some(T), + None, // variant 0 + Some(T), // variant 1 } use Option::*; diff --git a/src/tools/miri/tests/fail/issue-miri-3288-ice-symbolic-alignment-extern-static.rs b/src/tools/miri/tests/fail/issue-miri-3288-ice-symbolic-alignment-extern-static.rs index 44604074982..fef5a6cddb9 100644 --- a/src/tools/miri/tests/fail/issue-miri-3288-ice-symbolic-alignment-extern-static.rs +++ b/src/tools/miri/tests/fail/issue-miri-3288-ice-symbolic-alignment-extern-static.rs @@ -4,8 +4,7 @@ extern "C" { static _dispatch_queue_attr_concurrent: [u8; 0]; } -static DISPATCH_QUEUE_CONCURRENT: &'static [u8; 0] = - unsafe { &_dispatch_queue_attr_concurrent }; +static DISPATCH_QUEUE_CONCURRENT: &'static [u8; 0] = unsafe { &_dispatch_queue_attr_concurrent }; fn main() { let _val = *DISPATCH_QUEUE_CONCURRENT; //~ERROR: is not supported diff --git a/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond.rs b/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond.rs index b0325f7d78e..f362caa11dc 100644 --- a/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond.rs +++ b/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond.rs @@ -22,7 +22,7 @@ fn test_timed_wait_timeout(clock_id: i32) { let mut now_mu: MaybeUninit = MaybeUninit::uninit(); assert_eq!(libc::clock_gettime(clock_id, now_mu.as_mut_ptr()), 0); let now = now_mu.assume_init(); - // Waiting for a second... mostly because waiting less requires mich more tricky arithmetic. + // Waiting for a second... mostly because waiting less requires much more tricky arithmetic. // FIXME: wait less. let timeout = libc::timespec { tv_sec: now.tv_sec + 1, tv_nsec: now.tv_nsec }; diff --git a/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond_isolated.rs b/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond_isolated.rs index 103ce44006d..66c0895a5da 100644 --- a/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond_isolated.rs +++ b/src/tools/miri/tests/pass-dep/concurrency/libc_pthread_cond_isolated.rs @@ -21,7 +21,7 @@ fn test_timed_wait_timeout(clock_id: i32) { let mut now_mu: MaybeUninit = MaybeUninit::uninit(); assert_eq!(libc::clock_gettime(clock_id, now_mu.as_mut_ptr()), 0); let now = now_mu.assume_init(); - // Waiting for a second... mostly because waiting less requires mich more tricky arithmetic. + // Waiting for a second... mostly because waiting less requires much more tricky arithmetic. // FIXME: wait less. let timeout = libc::timespec { tv_sec: now.tv_sec + 1, tv_nsec: now.tv_nsec }; diff --git a/src/tools/miri/tests/pass-dep/shims/mmap.rs b/src/tools/miri/tests/pass-dep/shims/mmap.rs index e19f54d0687..7bbb9dd53cb 100644 --- a/src/tools/miri/tests/pass-dep/shims/mmap.rs +++ b/src/tools/miri/tests/pass-dep/shims/mmap.rs @@ -5,16 +5,25 @@ use std::io::Error; use std::{ptr, slice}; -fn test_mmap() { +fn test_mmap( + mmap: unsafe extern "C" fn( + *mut libc::c_void, + libc::size_t, + libc::c_int, + libc::c_int, + libc::c_int, + Offset, + ) -> *mut libc::c_void, +) { let page_size = page_size::get(); let ptr = unsafe { - libc::mmap( + mmap( ptr::null_mut(), page_size, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, -1, - 0, + Default::default(), ) }; assert!(!ptr.is_null()); @@ -35,40 +44,40 @@ fn test_mmap() { // Test all of our error conditions let ptr = unsafe { - libc::mmap( + mmap( ptr::null_mut(), page_size, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_PRIVATE | libc::MAP_SHARED, // Can't be both private and shared -1, - 0, + Default::default(), ) }; assert_eq!(ptr, libc::MAP_FAILED); assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL); let ptr = unsafe { - libc::mmap( + mmap( ptr::null_mut(), 0, // Can't map no memory libc::PROT_READ | libc::PROT_WRITE, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, -1, - 0, + Default::default(), ) }; assert_eq!(ptr, libc::MAP_FAILED); assert_eq!(Error::last_os_error().raw_os_error().unwrap(), libc::EINVAL); let ptr = unsafe { - libc::mmap( + mmap( ptr::invalid_mut(page_size * 64), page_size, libc::PROT_READ | libc::PROT_WRITE, // We don't support MAP_FIXED libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED, -1, - 0, + Default::default(), ) }; assert_eq!(ptr, libc::MAP_FAILED); @@ -77,13 +86,13 @@ fn test_mmap() { // We don't support protections other than read+write for prot in [libc::PROT_NONE, libc::PROT_EXEC, libc::PROT_READ, libc::PROT_WRITE] { let ptr = unsafe { - libc::mmap( + mmap( ptr::null_mut(), page_size, prot, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, -1, - 0, + Default::default(), ) }; assert_eq!(ptr, libc::MAP_FAILED); @@ -93,13 +102,13 @@ fn test_mmap() { // We report an error for mappings whose length cannot be rounded up to a multiple of // the page size. let ptr = unsafe { - libc::mmap( + mmap( ptr::null_mut(), usize::MAX - 1, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, -1, - 0, + Default::default(), ) }; assert_eq!(ptr, libc::MAP_FAILED); @@ -163,7 +172,9 @@ fn test_mremap() { } fn main() { - test_mmap(); + test_mmap(libc::mmap); + #[cfg(target_os = "linux")] + test_mmap(libc::mmap64); #[cfg(target_os = "linux")] test_mremap(); } diff --git a/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs b/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs index 4cc5b7d68a3..077bbfff164 100644 --- a/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs +++ b/src/tools/miri/tests/pass-dep/shims/pthread-sync.rs @@ -1,24 +1,34 @@ //@ignore-target-windows: No libc on Windows +// We use `yield` to test specific interleavings, so disable automatic preemption. +//@compile-flags: -Zmiri-preemption-rate=0 +#![feature(sync_unsafe_cell)] + +use std::cell::SyncUnsafeCell; +use std::thread; +use std::{mem, ptr}; fn main() { test_mutex_libc_init_recursive(); test_mutex_libc_init_normal(); test_mutex_libc_init_errorcheck(); test_rwlock_libc_static_initializer(); - #[cfg(target_os = "linux")] test_mutex_libc_static_initializer_recursive(); + + test_mutex(); + check_rwlock_write(); + check_rwlock_read_no_deadlock(); } fn test_mutex_libc_init_recursive() { unsafe { - let mut attr: libc::pthread_mutexattr_t = std::mem::zeroed(); + let mut attr: libc::pthread_mutexattr_t = mem::zeroed(); assert_eq!(libc::pthread_mutexattr_init(&mut attr as *mut _), 0); assert_eq!( libc::pthread_mutexattr_settype(&mut attr as *mut _, libc::PTHREAD_MUTEX_RECURSIVE), 0, ); - let mut mutex: libc::pthread_mutex_t = std::mem::zeroed(); + let mut mutex: libc::pthread_mutex_t = mem::zeroed(); assert_eq!(libc::pthread_mutex_init(&mut mutex as *mut _, &mut attr as *mut _), 0); assert_eq!(libc::pthread_mutex_lock(&mut mutex as *mut _), 0); assert_eq!(libc::pthread_mutex_trylock(&mut mutex as *mut _), 0); @@ -36,7 +46,7 @@ fn test_mutex_libc_init_recursive() { fn test_mutex_libc_init_normal() { unsafe { - let mut mutexattr: libc::pthread_mutexattr_t = std::mem::zeroed(); + let mut mutexattr: libc::pthread_mutexattr_t = mem::zeroed(); assert_eq!( libc::pthread_mutexattr_settype(&mut mutexattr as *mut _, 0x12345678), libc::EINVAL, @@ -45,7 +55,7 @@ fn test_mutex_libc_init_normal() { libc::pthread_mutexattr_settype(&mut mutexattr as *mut _, libc::PTHREAD_MUTEX_NORMAL), 0, ); - let mut mutex: libc::pthread_mutex_t = std::mem::zeroed(); + let mut mutex: libc::pthread_mutex_t = mem::zeroed(); assert_eq!(libc::pthread_mutex_init(&mut mutex as *mut _, &mutexattr as *const _), 0); assert_eq!(libc::pthread_mutex_lock(&mut mutex as *mut _), 0); assert_eq!(libc::pthread_mutex_trylock(&mut mutex as *mut _), libc::EBUSY); @@ -58,7 +68,7 @@ fn test_mutex_libc_init_normal() { fn test_mutex_libc_init_errorcheck() { unsafe { - let mut mutexattr: libc::pthread_mutexattr_t = std::mem::zeroed(); + let mut mutexattr: libc::pthread_mutexattr_t = mem::zeroed(); assert_eq!( libc::pthread_mutexattr_settype( &mut mutexattr as *mut _, @@ -66,7 +76,7 @@ fn test_mutex_libc_init_errorcheck() { ), 0, ); - let mut mutex: libc::pthread_mutex_t = std::mem::zeroed(); + let mut mutex: libc::pthread_mutex_t = mem::zeroed(); assert_eq!(libc::pthread_mutex_init(&mut mutex as *mut _, &mutexattr as *const _), 0); assert_eq!(libc::pthread_mutex_lock(&mut mutex as *mut _), 0); assert_eq!(libc::pthread_mutex_trylock(&mut mutex as *mut _), libc::EBUSY); @@ -98,9 +108,113 @@ fn test_mutex_libc_static_initializer_recursive() { } } -// Testing the behavior of std::sync::RwLock does not fully exercise the pthread rwlock shims, we -// need to go a layer deeper and test the behavior of the libc functions, because -// std::sys::unix::rwlock::RWLock itself keeps track of write_locked and num_readers. +struct SendPtr { + ptr: *mut T, +} +unsafe impl Send for SendPtr {} +impl Copy for SendPtr {} +impl Clone for SendPtr { + fn clone(&self) -> Self { + *self + } +} + +fn test_mutex() { + // Specifically *not* using `Arc` to make sure there is no synchronization apart from the mutex. + unsafe { + let data = SyncUnsafeCell::new((libc::PTHREAD_MUTEX_INITIALIZER, 0)); + let ptr = SendPtr { ptr: data.get() }; + let mut threads = Vec::new(); + + for _ in 0..3 { + let thread = thread::spawn(move || { + let ptr = ptr; // circumvent per-field closure capture + let mutexptr = ptr::addr_of_mut!((*ptr.ptr).0); + assert_eq!(libc::pthread_mutex_lock(mutexptr), 0); + thread::yield_now(); + (*ptr.ptr).1 += 1; + assert_eq!(libc::pthread_mutex_unlock(mutexptr), 0); + }); + threads.push(thread); + } + + for thread in threads { + thread.join().unwrap(); + } + + let mutexptr = ptr::addr_of_mut!((*ptr.ptr).0); + assert_eq!(libc::pthread_mutex_trylock(mutexptr), 0); + assert_eq!((*ptr.ptr).1, 3); + } +} + +fn check_rwlock_write() { + unsafe { + let data = SyncUnsafeCell::new((libc::PTHREAD_RWLOCK_INITIALIZER, 0)); + let ptr = SendPtr { ptr: data.get() }; + let mut threads = Vec::new(); + + for _ in 0..3 { + let thread = thread::spawn(move || { + let ptr = ptr; // circumvent per-field closure capture + let rwlockptr = ptr::addr_of_mut!((*ptr.ptr).0); + assert_eq!(libc::pthread_rwlock_wrlock(rwlockptr), 0); + thread::yield_now(); + (*ptr.ptr).1 += 1; + assert_eq!(libc::pthread_rwlock_unlock(rwlockptr), 0); + }); + threads.push(thread); + + let readthread = thread::spawn(move || { + let ptr = ptr; // circumvent per-field closure capture + let rwlockptr = ptr::addr_of_mut!((*ptr.ptr).0); + assert_eq!(libc::pthread_rwlock_rdlock(rwlockptr), 0); + thread::yield_now(); + let val = (*ptr.ptr).1; + assert!(val >= 0 && val <= 3); + assert_eq!(libc::pthread_rwlock_unlock(rwlockptr), 0); + }); + threads.push(readthread); + } + + for thread in threads { + thread.join().unwrap(); + } + + let rwlockptr = ptr::addr_of_mut!((*ptr.ptr).0); + assert_eq!(libc::pthread_rwlock_tryrdlock(rwlockptr), 0); + assert_eq!((*ptr.ptr).1, 3); + } +} + +fn check_rwlock_read_no_deadlock() { + unsafe { + let l1 = SyncUnsafeCell::new(libc::PTHREAD_RWLOCK_INITIALIZER); + let l1 = SendPtr { ptr: l1.get() }; + let l2 = SyncUnsafeCell::new(libc::PTHREAD_RWLOCK_INITIALIZER); + let l2 = SendPtr { ptr: l2.get() }; + + // acquire l1 and hold it until after the other thread is done + assert_eq!(libc::pthread_rwlock_rdlock(l1.ptr), 0); + let handle = thread::spawn(move || { + let l1 = l1; // circumvent per-field closure capture + let l2 = l2; // circumvent per-field closure capture + // acquire l2 before the other thread + assert_eq!(libc::pthread_rwlock_rdlock(l2.ptr), 0); + thread::yield_now(); + assert_eq!(libc::pthread_rwlock_rdlock(l1.ptr), 0); + thread::yield_now(); + assert_eq!(libc::pthread_rwlock_unlock(l1.ptr), 0); + assert_eq!(libc::pthread_rwlock_unlock(l2.ptr), 0); + }); + thread::yield_now(); + assert_eq!(libc::pthread_rwlock_rdlock(l2.ptr), 0); + handle.join().unwrap(); + } +} + +// std::sync::RwLock does not even used pthread_rwlock any more. +// Do some smoke testing of the API surface. fn test_rwlock_libc_static_initializer() { let rw = std::cell::UnsafeCell::new(libc::PTHREAD_RWLOCK_INITIALIZER); unsafe { diff --git a/src/tools/miri/tests/pass/align_offset_symbolic.rs b/src/tools/miri/tests/pass/align_offset_symbolic.rs index e96f11b1efa..ac28c63e081 100644 --- a/src/tools/miri/tests/pass/align_offset_symbolic.rs +++ b/src/tools/miri/tests/pass/align_offset_symbolic.rs @@ -113,7 +113,7 @@ fn vtable() { let ptr: &dyn Send = &0; let parts: (*const (), *const u8) = unsafe { mem::transmute(ptr) }; - let vtable = parts.1 ; + let vtable = parts.1; let offset = vtable.align_offset(mem::align_of::()); let _vtable_aligned = vtable.wrapping_add(offset) as *const [TWOPTR; 0]; // FIXME: we can't actually do the access since vtable pointers act like zero-sized allocations. diff --git a/src/tools/miri/tests/pass/concurrency/sync.rs b/src/tools/miri/tests/pass/concurrency/sync.rs index e93e617fd26..1d48e5312d4 100644 --- a/src/tools/miri/tests/pass/concurrency/sync.rs +++ b/src/tools/miri/tests/pass/concurrency/sync.rs @@ -1,6 +1,7 @@ //@revisions: stack tree //@[tree]compile-flags: -Zmiri-tree-borrows -//@compile-flags: -Zmiri-disable-isolation -Zmiri-strict-provenance +// We use `yield` to test specific interleavings, so disable automatic preemption. +//@compile-flags: -Zmiri-disable-isolation -Zmiri-strict-provenance -Zmiri-preemption-rate=0 use std::sync::{Arc, Barrier, Condvar, Mutex, Once, RwLock}; use std::thread; @@ -119,13 +120,25 @@ fn check_rwlock_write() { let mut threads = Vec::new(); for _ in 0..3 { - let data = Arc::clone(&data); - let thread = thread::spawn(move || { - let mut data = data.write().unwrap(); - thread::yield_now(); - *data += 1; + let thread = thread::spawn({ + let data = Arc::clone(&data); + move || { + let mut data = data.write().unwrap(); + thread::yield_now(); + *data += 1; + } }); threads.push(thread); + + let readthread = thread::spawn({ + let data = Arc::clone(&data); + move || { + let data = data.read().unwrap(); + thread::yield_now(); + assert!(*data >= 0 && *data <= 3); + } + }); + threads.push(readthread); } for thread in threads { @@ -144,8 +157,10 @@ fn check_rwlock_read_no_deadlock() { let l1_copy = Arc::clone(&l1); let l2_copy = Arc::clone(&l2); + // acquire l1 and hold it until after the other thread is done let _guard1 = l1.read().unwrap(); let handle = thread::spawn(move || { + // acquire l2 before the other thread let _guard2 = l2_copy.read().unwrap(); thread::yield_now(); let _guard1 = l1_copy.read().unwrap(); diff --git a/src/tools/miri/tests/pass/imported_main.rs b/src/tools/miri/tests/pass/imported_main.rs new file mode 100644 index 00000000000..32b39152f78 --- /dev/null +++ b/src/tools/miri/tests/pass/imported_main.rs @@ -0,0 +1,8 @@ +#![feature(imported_main)] + +pub mod foo { + pub fn mymain() { + println!("Hello, world!"); + } +} +use foo::mymain as main; diff --git a/src/tools/miri/tests/pass/imported_main.stdout b/src/tools/miri/tests/pass/imported_main.stdout new file mode 100644 index 00000000000..af5626b4a11 --- /dev/null +++ b/src/tools/miri/tests/pass/imported_main.stdout @@ -0,0 +1 @@ +Hello, world! diff --git a/src/tools/miri/tests/pass/intrinsics-x86-avx.rs b/src/tools/miri/tests/pass/intrinsics-x86-avx.rs index 933e3d4153a..7d43cc596ae 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86-avx.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86-avx.rs @@ -25,6 +25,528 @@ fn main() { #[target_feature(enable = "avx")] unsafe fn test_avx() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/avx.rs + + macro_rules! assert_approx_eq { + ($a:expr, $b:expr, $eps:expr) => {{ + let (a, b) = (&$a, &$b); + assert!( + (*a - *b).abs() < $eps, + "assertion failed: `(left !== right)` \ + (left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)", + *a, + *b, + $eps, + (*a - *b).abs() + ); + }}; + } + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_max_pd() { + let a = _mm256_setr_pd(1., 4., 5., 8.); + let b = _mm256_setr_pd(2., 3., 6., 7.); + let r = _mm256_max_pd(a, b); + let e = _mm256_setr_pd(2., 4., 6., 8.); + assert_eq_m256d(r, e); + // > If the values being compared are both 0.0s (of either sign), the + // > value in the second operand (source operand) is returned. + let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0)); + let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0)); + let wu: [u64; 4] = transmute(w); + let xu: [u64; 4] = transmute(x); + assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]); + assert_eq!(xu, [0u64; 4]); + // > If only one value is a NaN (SNaN or QNaN) for this instruction, the + // > second operand (source operand), either a NaN or a valid + // > floating-point value, is written to the result. + let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0)); + let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN)); + let yf: [f64; 4] = transmute(y); + let zf: [f64; 4] = transmute(z); + assert_eq!(yf, [0.0; 4]); + assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf); + } + test_mm256_max_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_max_ps() { + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_max_ps(a, b); + let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.); + assert_eq_m256(r, e); + // > If the values being compared are both 0.0s (of either sign), the + // > value in the second operand (source operand) is returned. + let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0)); + let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0)); + let wu: [u32; 8] = transmute(w); + let xu: [u32; 8] = transmute(x); + assert_eq!(wu, [0x8000_0000u32; 8]); + assert_eq!(xu, [0u32; 8]); + // > If only one value is a NaN (SNaN or QNaN) for this instruction, the + // > second operand (source operand), either a NaN or a valid + // > floating-point value, is written to the result. + let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0)); + let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN)); + let yf: [f32; 8] = transmute(y); + let zf: [f32; 8] = transmute(z); + assert_eq!(yf, [0.0; 8]); + assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf); + } + test_mm256_max_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_min_pd() { + let a = _mm256_setr_pd(1., 4., 5., 8.); + let b = _mm256_setr_pd(2., 3., 6., 7.); + let r = _mm256_min_pd(a, b); + let e = _mm256_setr_pd(1., 3., 5., 7.); + assert_eq_m256d(r, e); + // > If the values being compared are both 0.0s (of either sign), the + // > value in the second operand (source operand) is returned. + let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0)); + let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0)); + let wu: [u64; 4] = transmute(w); + let xu: [u64; 4] = transmute(x); + assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]); + assert_eq!(xu, [0u64; 4]); + // > If only one value is a NaN (SNaN or QNaN) for this instruction, the + // > second operand (source operand), either a NaN or a valid + // > floating-point value, is written to the result. + let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0)); + let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN)); + let yf: [f64; 4] = transmute(y); + let zf: [f64; 4] = transmute(z); + assert_eq!(yf, [0.0; 4]); + assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf); + } + test_mm256_min_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_min_ps() { + let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm256_min_ps(a, b); + let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.); + assert_eq_m256(r, e); + // > If the values being compared are both 0.0s (of either sign), the + // > value in the second operand (source operand) is returned. + let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0)); + let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0)); + let wu: [u32; 8] = transmute(w); + let xu: [u32; 8] = transmute(x); + assert_eq!(wu, [0x8000_0000u32; 8]); + assert_eq!(xu, [0u32; 8]); + // > If only one value is a NaN (SNaN or QNaN) for this instruction, the + // > second operand (source operand), either a NaN or a valid + // > floating-point value, is written to the result. + let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0)); + let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN)); + let yf: [f32; 8] = transmute(y); + let zf: [f32; 8] = transmute(z); + assert_eq!(yf, [0.0; 8]); + assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf); + } + test_mm256_min_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_round_nearest_f32() { + #[target_feature(enable = "avx")] + unsafe fn test(x: f32, res: f32) { + let a = _mm256_set1_ps(x); + let e = _mm256_set1_ps(res); + let r = _mm256_round_ps::<_MM_FROUND_TO_NEAREST_INT>(a); + assert_eq_m256(r, e); + // Assume round-to-nearest by default + let r = _mm256_round_ps::<_MM_FROUND_CUR_DIRECTION>(a); + assert_eq_m256(r, e); + } + + // Test rounding direction + test(-2.5, -2.0); + test(-1.75, -2.0); + test(-1.5, -2.0); + test(-1.25, -1.0); + test(-1.0, -1.0); + test(0.0, 0.0); + test(1.0, 1.0); + test(1.25, 1.0); + test(1.5, 2.0); + test(1.75, 2.0); + test(2.5, 2.0); + + // Test that each element is rounded + let a = _mm256_setr_ps(1.5, 3.5, 5.5, 7.5, 9.5, 11.5, 13.5, 15.5); + let e = _mm256_setr_ps(2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0); + let r = _mm256_round_ps::<_MM_FROUND_TO_NEAREST_INT>(a); + assert_eq_m256(r, e); + // Assume round-to-nearest by default + let r = _mm256_round_ps::<_MM_FROUND_CUR_DIRECTION>(a); + assert_eq_m256(r, e); + } + test_round_nearest_f32(); + + #[target_feature(enable = "avx")] + unsafe fn test_round_floor_f32() { + #[target_feature(enable = "avx")] + unsafe fn test(x: f32, res: f32) { + let a = _mm256_set1_ps(x); + let e = _mm256_set1_ps(res); + let r = _mm256_floor_ps(a); + assert_eq_m256(r, e); + let r = _mm256_round_ps::<_MM_FROUND_TO_NEG_INF>(a); + assert_eq_m256(r, e); + } + + // Test rounding direction + test(-2.5, -3.0); + test(-1.75, -2.0); + test(-1.5, -2.0); + test(-1.25, -2.0); + test(-1.0, -1.0); + test(0.0, 0.0); + test(1.0, 1.0); + test(1.25, 1.0); + test(1.5, 1.0); + test(1.75, 1.0); + test(2.5, 2.0); + + // Test that each element is rounded + let a = _mm256_setr_ps(1.5, 3.5, 5.5, 7.5, 9.5, 11.5, 13.5, 15.5); + let e = _mm256_setr_ps(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0); + let r = _mm256_floor_ps(a); + assert_eq_m256(r, e); + let r = _mm256_round_ps::<_MM_FROUND_TO_NEG_INF>(a); + assert_eq_m256(r, e); + } + test_round_floor_f32(); + + #[target_feature(enable = "avx")] + unsafe fn test_round_ceil_f32() { + #[target_feature(enable = "avx")] + unsafe fn test(x: f32, res: f32) { + let a = _mm256_set1_ps(x); + let e = _mm256_set1_ps(res); + let r = _mm256_ceil_ps(a); + assert_eq_m256(r, e); + let r = _mm256_round_ps::<_MM_FROUND_TO_POS_INF>(a); + assert_eq_m256(r, e); + } + + // Test rounding direction + test(-2.5, -2.0); + test(-1.75, -1.0); + test(-1.5, -1.0); + test(-1.25, -1.0); + test(-1.0, -1.0); + test(0.0, 0.0); + test(1.0, 1.0); + test(1.25, 2.0); + test(1.5, 2.0); + test(1.75, 2.0); + test(2.5, 3.0); + + // Test that each element is rounded + let a = _mm256_setr_ps(1.5, 3.5, 5.5, 7.5, 9.5, 11.5, 13.5, 15.5); + let e = _mm256_setr_ps(2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0); + let r = _mm256_ceil_ps(a); + assert_eq_m256(r, e); + let r = _mm256_round_ps::<_MM_FROUND_TO_POS_INF>(a); + assert_eq_m256(r, e); + } + test_round_ceil_f32(); + + #[target_feature(enable = "avx")] + unsafe fn test_round_trunc_f32() { + #[target_feature(enable = "avx")] + unsafe fn test(x: f32, res: f32) { + let a = _mm256_set1_ps(x); + let e = _mm256_set1_ps(res); + let r = _mm256_round_ps::<_MM_FROUND_TO_ZERO>(a); + assert_eq_m256(r, e); + } + + // Test rounding direction + test(-2.5, -2.0); + test(-1.75, -1.0); + test(-1.5, -1.0); + test(-1.25, -1.0); + test(-1.0, -1.0); + test(0.0, 0.0); + test(1.0, 1.0); + test(1.25, 1.0); + test(1.5, 1.0); + test(1.75, 1.0); + test(2.5, 2.0); + + // Test that each element is rounded + let a = _mm256_setr_ps(1.5, 3.5, 5.5, 7.5, 9.5, 11.5, 13.5, 15.5); + let e = _mm256_setr_ps(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0); + let r = _mm256_round_ps::<_MM_FROUND_TO_ZERO>(a); + assert_eq_m256(r, e); + } + test_round_trunc_f32(); + + #[target_feature(enable = "avx")] + unsafe fn test_round_nearest_f64() { + #[target_feature(enable = "avx")] + unsafe fn test(x: f64, res: f64) { + let a = _mm256_set1_pd(x); + let e = _mm256_set1_pd(res); + let r = _mm256_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a); + assert_eq_m256d(r, e); + // Assume round-to-nearest by default + let r = _mm256_round_pd::<_MM_FROUND_CUR_DIRECTION>(a); + assert_eq_m256d(r, e); + } + + // Test rounding direction + test(-2.5, -2.0); + test(-1.75, -2.0); + test(-1.5, -2.0); + test(-1.25, -1.0); + test(-1.0, -1.0); + test(0.0, 0.0); + test(1.0, 1.0); + test(1.25, 1.0); + test(1.5, 2.0); + test(1.75, 2.0); + test(2.5, 2.0); + + // Test that each element is rounded + let a = _mm256_setr_pd(1.5, 3.5, 5.5, 7.5); + let e = _mm256_setr_pd(2.0, 4.0, 6.0, 8.0); + let r = _mm256_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a); + assert_eq_m256d(r, e); + // Assume round-to-nearest by default + let r = _mm256_round_pd::<_MM_FROUND_CUR_DIRECTION>(a); + assert_eq_m256d(r, e); + } + test_round_nearest_f64(); + + #[target_feature(enable = "avx")] + unsafe fn test_round_floor_f64() { + #[target_feature(enable = "avx")] + unsafe fn test(x: f64, res: f64) { + let a = _mm256_set1_pd(x); + let e = _mm256_set1_pd(res); + let r = _mm256_floor_pd(a); + assert_eq_m256d(r, e); + let r = _mm256_round_pd::<_MM_FROUND_TO_NEG_INF>(a); + assert_eq_m256d(r, e); + } + + // Test rounding direction + test(-2.5, -3.0); + test(-1.75, -2.0); + test(-1.5, -2.0); + test(-1.25, -2.0); + test(-1.0, -1.0); + test(0.0, 0.0); + test(1.0, 1.0); + test(1.25, 1.0); + test(1.5, 1.0); + test(1.75, 1.0); + test(2.5, 2.0); + + // Test that each element is rounded + let a = _mm256_setr_pd(1.5, 3.5, 5.5, 7.5); + let e = _mm256_setr_pd(1.0, 3.0, 5.0, 7.0); + let r = _mm256_floor_pd(a); + assert_eq_m256d(r, e); + let r = _mm256_round_pd::<_MM_FROUND_TO_NEG_INF>(a); + assert_eq_m256d(r, e); + } + test_round_floor_f64(); + + #[target_feature(enable = "avx")] + unsafe fn test_round_ceil_f64() { + #[target_feature(enable = "avx")] + unsafe fn test(x: f64, res: f64) { + let a = _mm256_set1_pd(x); + let e = _mm256_set1_pd(res); + let r = _mm256_ceil_pd(a); + assert_eq_m256d(r, e); + let r = _mm256_round_pd::<_MM_FROUND_TO_POS_INF>(a); + assert_eq_m256d(r, e); + } + + // Test rounding direction + test(-2.5, -2.0); + test(-1.75, -1.0); + test(-1.5, -1.0); + test(-1.25, -1.0); + test(-1.0, -1.0); + test(0.0, 0.0); + test(1.0, 1.0); + test(1.25, 2.0); + test(1.5, 2.0); + test(1.75, 2.0); + test(2.5, 3.0); + + // Test that each element is rounded + let a = _mm256_setr_pd(1.5, 3.5, 5.5, 7.5); + let e = _mm256_setr_pd(2.0, 4.0, 6.0, 8.0); + let r = _mm256_ceil_pd(a); + assert_eq_m256d(r, e); + let r = _mm256_round_pd::<_MM_FROUND_TO_POS_INF>(a); + assert_eq_m256d(r, e); + } + test_round_ceil_f64(); + + #[target_feature(enable = "avx")] + unsafe fn test_round_trunc_f64() { + #[target_feature(enable = "avx")] + unsafe fn test(x: f64, res: f64) { + let a = _mm256_set1_pd(x); + let e = _mm256_set1_pd(res); + let r = _mm256_round_pd::<_MM_FROUND_TO_ZERO>(a); + assert_eq_m256d(r, e); + } + + // Test rounding direction + test(-2.5, -2.0); + test(-1.75, -1.0); + test(-1.5, -1.0); + test(-1.25, -1.0); + test(-1.0, -1.0); + test(0.0, 0.0); + test(1.0, 1.0); + test(1.25, 1.0); + test(1.5, 1.0); + test(1.75, 1.0); + test(2.5, 2.0); + + // Test that each element is rounded + let a = _mm256_setr_pd(1.5, 3.5, 5.5, 7.5); + let e = _mm256_setr_pd(1.0, 3.0, 5.0, 7.0); + let r = _mm256_round_pd::<_MM_FROUND_TO_ZERO>(a); + assert_eq_m256d(r, e); + } + test_round_trunc_f64(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_sqrt_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let r = _mm256_sqrt_ps(a); + let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.); + assert_eq_m256(r, e); + } + test_mm256_sqrt_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_rcp_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_rcp_ps(a); + #[rustfmt::skip] + let e = _mm256_setr_ps( + 0.99975586, 0.49987793, 0.33325195, 0.24993896, + 0.19995117, 0.16662598, 0.14282227, 0.12496948, + ); + let rel_err = 0.00048828125; + + let r: [f32; 8] = transmute(r); + let e: [f32; 8] = transmute(e); + for i in 0..8 { + assert_approx_eq!(r[i], e[i], 2. * rel_err); + } + } + test_mm256_rcp_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_rsqrt_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_rsqrt_ps(a); + #[rustfmt::skip] + let e = _mm256_setr_ps( + 0.99975586, 0.7069092, 0.5772705, 0.49987793, + 0.44714355, 0.40820313, 0.3779297, 0.3534546, + ); + let rel_err = 0.00048828125; + + let r: [f32; 8] = transmute(r); + let e: [f32; 8] = transmute(e); + for i in 0..8 { + assert_approx_eq!(r[i], e[i], 2. * rel_err); + } + } + test_mm256_rsqrt_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_dp_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_dp_ps::<0xFF>(a, b); + let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.); + assert_eq_m256(r, e); + } + test_mm256_dp_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_hadd_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_hadd_pd(a, b); + let e = _mm256_setr_pd(13., 7., 41., 7.); + assert_eq_m256d(r, e); + + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_hadd_pd(a, b); + let e = _mm256_setr_pd(3., 11., 7., 15.); + assert_eq_m256d(r, e); + } + test_mm256_hadd_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_hadd_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_hadd_ps(a, b); + let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.); + assert_eq_m256(r, e); + + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_hadd_ps(a, b); + let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.); + assert_eq_m256(r, e); + } + test_mm256_hadd_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_hsub_pd() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let b = _mm256_setr_pd(4., 3., 2., 5.); + let r = _mm256_hsub_pd(a, b); + let e = _mm256_setr_pd(-5., 1., -9., -3.); + assert_eq_m256d(r, e); + + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_hsub_pd(a, b); + let e = _mm256_setr_pd(-1., -1., -1., -1.); + assert_eq_m256d(r, e); + } + test_mm256_hsub_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_hsub_ps() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let r = _mm256_hsub_ps(a, b); + let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.); + assert_eq_m256(r, e); + + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_hsub_ps(a, b); + let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.); + assert_eq_m256(r, e); + } + test_mm256_hsub_ps(); + fn expected_cmp(imm: i32, lhs: F, rhs: F, if_t: F, if_f: F) -> F { let res = match imm { _CMP_EQ_OQ => lhs == rhs, @@ -135,12 +657,54 @@ unsafe fn test_avx() { } } + #[target_feature(enable = "avx")] + unsafe fn test_mm256_cmp_ps() { + let values = [ + (1.0, 1.0), + (0.0, 1.0), + (1.0, 0.0), + (f32::NAN, 0.0), + (0.0, f32::NAN), + (f32::NAN, f32::NAN), + ]; + + for (lhs, rhs) in values { + let a = _mm256_set1_ps(lhs); + let b = _mm256_set1_ps(rhs); + let r: [u32; 8] = transmute(_mm256_cmp_ps::(a, b)); + let e: [u32; 8] = transmute(_mm256_set1_ps(expected_cmp_f32(IMM, lhs, rhs))); + assert_eq!(r, e); + } + } + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_cmp_pd() { + let values = [ + (1.0, 1.0), + (0.0, 1.0), + (1.0, 0.0), + (f64::NAN, 0.0), + (0.0, f64::NAN), + (f64::NAN, f64::NAN), + ]; + + for (lhs, rhs) in values { + let a = _mm256_set1_pd(lhs); + let b = _mm256_set1_pd(rhs); + let r: [u64; 4] = transmute(_mm256_cmp_pd::(a, b)); + let e: [u64; 4] = transmute(_mm256_set1_pd(expected_cmp_f64(IMM, lhs, rhs))); + assert_eq!(r, e); + } + } + #[target_feature(enable = "avx")] unsafe fn test_cmp() { test_mm_cmp_ss::(); test_mm_cmp_ps::(); test_mm_cmp_sd::(); test_mm_cmp_pd::(); + test_mm256_cmp_ps::(); + test_mm256_cmp_pd::(); } test_cmp::<_CMP_EQ_OQ>(); @@ -159,4 +723,709 @@ unsafe fn test_avx() { test_cmp::<_CMP_GE_OS>(); test_cmp::<_CMP_GT_OS>(); test_cmp::<_CMP_TRUE_US>(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_cvtps_epi32() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let r = _mm256_cvtps_epi32(a); + let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); + assert_eq_m256i(r, e); + + let a = _mm256_setr_ps( + f32::NEG_INFINITY, + f32::INFINITY, + f32::MIN, + f32::MAX, + f32::NAN, + f32::NAN, + f32::NAN, + f32::NAN, + ); + let r = _mm256_cvtps_epi32(a); + assert_eq_m256i(r, _mm256_set1_epi32(i32::MIN)); + } + test_mm256_cvtps_epi32(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_cvttps_epi32() { + let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); + let r = _mm256_cvttps_epi32(a); + let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); + assert_eq_m256i(r, e); + + let a = _mm256_setr_ps( + f32::NEG_INFINITY, + f32::INFINITY, + f32::MIN, + f32::MAX, + f32::NAN, + f32::NAN, + f32::NAN, + f32::NAN, + ); + let r = _mm256_cvttps_epi32(a); + assert_eq_m256i(r, _mm256_set1_epi32(i32::MIN)); + } + test_mm256_cvttps_epi32(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_cvtpd_epi32() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_cvtpd_epi32(a); + let e = _mm_setr_epi32(4, 9, 16, 25); + assert_eq_m128i(r, e); + + let a = _mm256_setr_pd(f64::NEG_INFINITY, f64::INFINITY, f64::MIN, f64::MAX); + let r = _mm256_cvtpd_epi32(a); + assert_eq_m128i(r, _mm_set1_epi32(i32::MIN)); + } + test_mm256_cvtpd_epi32(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_cvttpd_epi32() { + let a = _mm256_setr_pd(4., 9., 16., 25.); + let r = _mm256_cvttpd_epi32(a); + let e = _mm_setr_epi32(4, 9, 16, 25); + assert_eq_m128i(r, e); + + let a = _mm256_setr_pd(f64::NEG_INFINITY, f64::INFINITY, f64::MIN, f64::MAX); + let r = _mm256_cvttpd_epi32(a); + assert_eq_m128i(r, _mm_set1_epi32(i32::MIN)); + } + test_mm256_cvttpd_epi32(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_permutevar_ps() { + let a = _mm_setr_ps(4., 3., 2., 5.); + let b = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_permutevar_ps(a, b); + let e = _mm_setr_ps(3., 2., 5., 4.); + assert_eq_m128(r, e); + } + test_mm_permutevar_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_permutevar_ps() { + let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); + let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_permutevar_ps(a, b); + let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.); + assert_eq_m256(r, e); + } + test_mm256_permutevar_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_permutevar_pd() { + let a = _mm_setr_pd(4., 3.); + let b = _mm_setr_epi64x(3, 0); + let r = _mm_permutevar_pd(a, b); + let e = _mm_setr_pd(3., 4.); + assert_eq_m128d(r, e); + } + test_mm_permutevar_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_permutevar_pd() { + let a = _mm256_setr_pd(4., 3., 2., 5.); + let b = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_permutevar_pd(a, b); + let e = _mm256_setr_pd(4., 3., 5., 2.); + assert_eq_m256d(r, e); + } + test_mm256_permutevar_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_permute2f128_ps() { + let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); + let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); + let r = _mm256_permute2f128_ps::<0x13>(a, b); + let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.); + assert_eq_m256(r, e); + + let r = _mm256_permute2f128_ps::<0x44>(a, b); + let e = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + assert_eq_m256(r, e); + } + test_mm256_permute2f128_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_permute2f128_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_permute2f128_pd::<0x31>(a, b); + let e = _mm256_setr_pd(3., 4., 7., 8.); + assert_eq_m256d(r, e); + + let r = _mm256_permute2f128_pd::<0x44>(a, b); + let e = _mm256_setr_pd(0.0, 0.0, 0.0, 0.0); + assert_eq_m256d(r, e); + } + test_mm256_permute2f128_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_permute2f128_si256() { + let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4); + let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8); + let r = _mm256_permute2f128_si256::<0x20>(a, b); + let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m256i(r, e); + + let r = _mm256_permute2f128_si256::<0x44>(a, b); + let e = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + test_mm256_permute2f128_si256(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_maskload_ps() { + let a = &[1.0f32, 2., 3., 4.]; + let mask = _mm_setr_epi32(0, !0, 0, !0); + let r = _mm_maskload_ps(a.as_ptr(), mask); + let e = _mm_setr_ps(0., 2., 0., 4.); + assert_eq_m128(r, e); + + // Unaligned pointer + let a = Unaligned::new([1.0f32, 2., 3., 4.]); + let mask = _mm_setr_epi32(0, !0, 0, !0); + let r = _mm_maskload_ps(a.as_ptr().cast(), mask); + let e = _mm_setr_ps(0., 2., 0., 4.); + assert_eq_m128(r, e); + + // Only loading first element, so slice can be short. + let a = &[2.0f32]; + let mask = _mm_setr_epi32(!0, 0, 0, 0); + let r = _mm_maskload_ps(a.as_ptr(), mask); + let e = _mm_setr_ps(2.0, 0.0, 0.0, 0.0); + assert_eq_m128(r, e); + + // Only loading last element, so slice can be short. + let a = &[2.0f32]; + let mask = _mm_setr_epi32(0, 0, 0, !0); + let r = _mm_maskload_ps(a.as_ptr().wrapping_sub(3), mask); + let e = _mm_setr_ps(0.0, 0.0, 0.0, 2.0); + assert_eq_m128(r, e); + } + test_mm_maskload_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_maskload_pd() { + let a = &[1.0f64, 2.]; + let mask = _mm_setr_epi64x(0, !0); + let r = _mm_maskload_pd(a.as_ptr(), mask); + let e = _mm_setr_pd(0., 2.); + assert_eq_m128d(r, e); + + // Unaligned pointer + let a = Unaligned::new([1.0f64, 2.]); + let mask = _mm_setr_epi64x(0, !0); + let r = _mm_maskload_pd(a.as_ptr().cast(), mask); + let e = _mm_setr_pd(0., 2.); + assert_eq_m128d(r, e); + + // Only loading first element, so slice can be short. + let a = &[2.0f64]; + let mask = _mm_setr_epi64x(!0, 0); + let r = _mm_maskload_pd(a.as_ptr(), mask); + let e = _mm_setr_pd(2.0, 0.0); + assert_eq_m128d(r, e); + + // Only loading last element, so slice can be short. + let a = &[2.0f64]; + let mask = _mm_setr_epi64x(0, !0); + let r = _mm_maskload_pd(a.as_ptr().wrapping_sub(1), mask); + let e = _mm_setr_pd(0.0, 2.0); + assert_eq_m128d(r, e); + } + test_mm_maskload_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_maskload_ps() { + let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.]; + let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); + let r = _mm256_maskload_ps(a.as_ptr(), mask); + let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m256(r, e); + + // Unaligned pointer + let a = Unaligned::new([1.0f32, 2., 3., 4., 5., 6., 7., 8.]); + let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); + let r = _mm256_maskload_ps(a.as_ptr().cast(), mask); + let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m256(r, e); + + // Only loading first element, so slice can be short. + let a = &[2.0f32]; + let mask = _mm256_setr_epi32(!0, 0, 0, 0, 0, 0, 0, 0); + let r = _mm256_maskload_ps(a.as_ptr(), mask); + let e = _mm256_setr_ps(2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + assert_eq_m256(r, e); + + // Only loading last element, so slice can be short. + let a = &[2.0f32]; + let mask = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, !0); + let r = _mm256_maskload_ps(a.as_ptr().wrapping_sub(7), mask); + let e = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0); + assert_eq_m256(r, e); + } + test_mm256_maskload_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_maskload_pd() { + let a = &[1.0f64, 2., 3., 4.]; + let mask = _mm256_setr_epi64x(0, !0, 0, !0); + let r = _mm256_maskload_pd(a.as_ptr(), mask); + let e = _mm256_setr_pd(0., 2., 0., 4.); + assert_eq_m256d(r, e); + + // Unaligned pointer + let a = Unaligned::new([1.0f64, 2., 3., 4.]); + let mask = _mm256_setr_epi64x(0, !0, 0, !0); + let r = _mm256_maskload_pd(a.as_ptr().cast(), mask); + let e = _mm256_setr_pd(0., 2., 0., 4.); + assert_eq_m256d(r, e); + + // Only loading first element, so slice can be short. + let a = &[2.0f64]; + let mask = _mm256_setr_epi64x(!0, 0, 0, 0); + let r = _mm256_maskload_pd(a.as_ptr(), mask); + let e = _mm256_setr_pd(2.0, 0.0, 0.0, 0.0); + assert_eq_m256d(r, e); + + // Only loading last element, so slice can be short. + let a = &[2.0f64]; + let mask = _mm256_setr_epi64x(0, 0, 0, !0); + let r = _mm256_maskload_pd(a.as_ptr().wrapping_sub(3), mask); + let e = _mm256_setr_pd(0.0, 0.0, 0.0, 2.0); + assert_eq_m256d(r, e); + } + test_mm256_maskload_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_maskstore_ps() { + let mut r = _mm_set1_ps(0.); + let mask = _mm_setr_epi32(0, !0, 0, !0); + let a = _mm_setr_ps(1., 2., 3., 4.); + _mm_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a); + let e = _mm_setr_ps(0., 2., 0., 4.); + assert_eq_m128(r, e); + + // Unaligned pointer + let mut r = Unaligned::new([0.0f32; 4]); + let mask = _mm_setr_epi32(0, !0, 0, !0); + let a = _mm_setr_ps(1., 2., 3., 4.); + _mm_maskstore_ps(r.as_mut_ptr().cast(), mask, a); + let e = [0., 2., 0., 4.]; + assert_eq!(r.read(), e); + + // Only storing first element, so slice can be short. + let mut r = [0.0f32]; + let mask = _mm_setr_epi32(!0, 0, 0, 0); + let a = _mm_setr_ps(1., 2., 3., 4.); + _mm_maskstore_ps(r.as_mut_ptr(), mask, a); + let e = [1.0f32]; + assert_eq!(r, e); + + // Only storing last element, so slice can be short. + let mut r = [0.0f32]; + let mask = _mm_setr_epi32(0, 0, 0, !0); + let a = _mm_setr_ps(1., 2., 3., 4.); + _mm_maskstore_ps(r.as_mut_ptr().wrapping_sub(3), mask, a); + let e = [4.0f32]; + assert_eq!(r, e); + } + test_mm_maskstore_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_maskstore_pd() { + let mut r = _mm_set1_pd(0.); + let mask = _mm_setr_epi64x(0, !0); + let a = _mm_setr_pd(1., 2.); + _mm_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a); + let e = _mm_setr_pd(0., 2.); + assert_eq_m128d(r, e); + + // Unaligned pointer + let mut r = Unaligned::new([0.0f64; 2]); + let mask = _mm_setr_epi64x(0, !0); + let a = _mm_setr_pd(1., 2.); + _mm_maskstore_pd(r.as_mut_ptr().cast(), mask, a); + let e = [0., 2.]; + assert_eq!(r.read(), e); + + // Only storing first element, so slice can be short. + let mut r = [0.0f64]; + let mask = _mm_setr_epi64x(!0, 0); + let a = _mm_setr_pd(1., 2.); + _mm_maskstore_pd(r.as_mut_ptr(), mask, a); + let e = [1.0f64]; + assert_eq!(r, e); + + // Only storing last element, so slice can be short. + let mut r = [0.0f64]; + let mask = _mm_setr_epi64x(0, !0); + let a = _mm_setr_pd(1., 2.); + _mm_maskstore_pd(r.as_mut_ptr().wrapping_sub(1), mask, a); + let e = [2.0f64]; + assert_eq!(r, e); + } + test_mm_maskstore_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_maskstore_ps() { + let mut r = _mm256_set1_ps(0.); + let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + _mm256_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a); + let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); + assert_eq_m256(r, e); + + // Unaligned pointer + let mut r = Unaligned::new([0.0f32; 8]); + let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + _mm256_maskstore_ps(r.as_mut_ptr().cast(), mask, a); + let e = [0., 2., 0., 4., 0., 6., 0., 8.]; + assert_eq!(r.read(), e); + + // Only storing first element, so slice can be short. + let mut r = [0.0f32]; + let mask = _mm256_setr_epi32(!0, 0, 0, 0, 0, 0, 0, 0); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + _mm256_maskstore_ps(r.as_mut_ptr(), mask, a); + let e = [1.0f32]; + assert_eq!(r, e); + + // Only storing last element, so slice can be short. + let mut r = [0.0f32]; + let mask = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, !0); + let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + _mm256_maskstore_ps(r.as_mut_ptr().wrapping_sub(7), mask, a); + let e = [8.0f32]; + assert_eq!(r, e); + } + test_mm256_maskstore_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_maskstore_pd() { + let mut r = _mm256_set1_pd(0.); + let mask = _mm256_setr_epi64x(0, !0, 0, !0); + let a = _mm256_setr_pd(1., 2., 3., 4.); + _mm256_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a); + let e = _mm256_setr_pd(0., 2., 0., 4.); + assert_eq_m256d(r, e); + + // Unaligned pointer + let mut r = Unaligned::new([0.0f64; 4]); + let mask = _mm256_setr_epi64x(0, !0, 0, !0); + let a = _mm256_setr_pd(1., 2., 3., 4.); + _mm256_maskstore_pd(r.as_mut_ptr().cast(), mask, a); + let e = [0., 2., 0., 4.]; + assert_eq!(r.read(), e); + + // Only storing first element, so slice can be short. + let mut r = [0.0f64]; + let mask = _mm256_setr_epi64x(!0, 0, 0, 0); + let a = _mm256_setr_pd(1., 2., 3., 4.); + _mm256_maskstore_pd(r.as_mut_ptr(), mask, a); + let e = [1.0f64]; + assert_eq!(r, e); + + // Only storing last element, so slice can be short. + let mut r = [0.0f64]; + let mask = _mm256_setr_epi64x(0, 0, 0, !0); + let a = _mm256_setr_pd(1., 2., 3., 4.); + _mm256_maskstore_pd(r.as_mut_ptr().wrapping_sub(3), mask, a); + let e = [4.0f64]; + assert_eq!(r, e); + } + test_mm256_maskstore_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_lddqu_si256() { + #[rustfmt::skip] + let a = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + let p = &a as *const _; + let r = _mm256_lddqu_si256(p); + #[rustfmt::skip] + let e = _mm256_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ); + assert_eq_m256i(r, e); + } + test_mm256_lddqu_si256(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testz_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testz_si256(a, b); + assert_eq!(r, 0); + let b = _mm256_set1_epi64x(0); + let r = _mm256_testz_si256(a, b); + assert_eq!(r, 1); + } + test_mm256_testz_si256(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testc_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testc_si256(a, b); + assert_eq!(r, 0); + let b = _mm256_set1_epi64x(0); + let r = _mm256_testc_si256(a, b); + assert_eq!(r, 1); + } + test_mm256_testc_si256(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testnzc_si256() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm256_setr_epi64x(5, 6, 7, 8); + let r = _mm256_testnzc_si256(a, b); + assert_eq!(r, 1); + let a = _mm256_setr_epi64x(0, 0, 0, 0); + let b = _mm256_setr_epi64x(0, 0, 0, 0); + let r = _mm256_testnzc_si256(a, b); + assert_eq!(r, 0); + } + test_mm256_testnzc_si256(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testz_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testz_pd(a, b); + assert_eq!(r, 1); + let a = _mm256_set1_pd(-1.); + let r = _mm256_testz_pd(a, a); + assert_eq!(r, 0); + } + test_mm256_testz_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testc_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testc_pd(a, b); + assert_eq!(r, 1); + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(-1.); + let r = _mm256_testc_pd(a, b); + assert_eq!(r, 0); + } + test_mm256_testc_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testnzc_pd() { + let a = _mm256_setr_pd(1., 2., 3., 4.); + let b = _mm256_setr_pd(5., 6., 7., 8.); + let r = _mm256_testnzc_pd(a, b); + assert_eq!(r, 0); + let a = _mm256_setr_pd(1., -1., -1., -1.); + let b = _mm256_setr_pd(-1., -1., 1., 1.); + let r = _mm256_testnzc_pd(a, b); + assert_eq!(r, 1); + } + test_mm256_testnzc_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_testz_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testz_pd(a, b); + assert_eq!(r, 1); + let a = _mm_set1_pd(-1.); + let r = _mm_testz_pd(a, a); + assert_eq!(r, 0); + } + test_mm_testz_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_testc_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testc_pd(a, b); + assert_eq!(r, 1); + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(-1.); + let r = _mm_testc_pd(a, b); + assert_eq!(r, 0); + } + test_mm_testc_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_testnzc_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(5., 6.); + let r = _mm_testnzc_pd(a, b); + assert_eq!(r, 0); + let a = _mm_setr_pd(1., -1.); + let b = _mm_setr_pd(-1., -1.); + let r = _mm_testnzc_pd(a, b); + assert_eq!(r, 1); + } + test_mm_testnzc_pd(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testz_ps() { + let a = _mm256_set1_ps(1.); + let r = _mm256_testz_ps(a, a); + assert_eq!(r, 1); + let a = _mm256_set1_ps(-1.); + let r = _mm256_testz_ps(a, a); + assert_eq!(r, 0); + } + test_mm256_testz_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testc_ps() { + let a = _mm256_set1_ps(1.); + let r = _mm256_testc_ps(a, a); + assert_eq!(r, 1); + let b = _mm256_set1_ps(-1.); + let r = _mm256_testc_ps(a, b); + assert_eq!(r, 0); + } + test_mm256_testc_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm256_testnzc_ps() { + let a = _mm256_set1_ps(1.); + let r = _mm256_testnzc_ps(a, a); + assert_eq!(r, 0); + let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.); + let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.); + let r = _mm256_testnzc_ps(a, b); + assert_eq!(r, 1); + } + test_mm256_testnzc_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_testz_ps() { + let a = _mm_set1_ps(1.); + let r = _mm_testz_ps(a, a); + assert_eq!(r, 1); + let a = _mm_set1_ps(-1.); + let r = _mm_testz_ps(a, a); + assert_eq!(r, 0); + } + test_mm_testz_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_testc_ps() { + let a = _mm_set1_ps(1.); + let r = _mm_testc_ps(a, a); + assert_eq!(r, 1); + let b = _mm_set1_ps(-1.); + let r = _mm_testc_ps(a, b); + assert_eq!(r, 0); + } + test_mm_testc_ps(); + + #[target_feature(enable = "avx")] + unsafe fn test_mm_testnzc_ps() { + let a = _mm_set1_ps(1.); + let r = _mm_testnzc_ps(a, a); + assert_eq!(r, 0); + let a = _mm_setr_ps(1., -1., -1., -1.); + let b = _mm_setr_ps(-1., -1., 1., 1.); + let r = _mm_testnzc_ps(a, b); + assert_eq!(r, 1); + } + test_mm_testnzc_ps(); +} + +#[target_feature(enable = "sse2")] +unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i { + _mm_set_epi64x(b, a) +} + +#[track_caller] +#[target_feature(enable = "sse")] +unsafe fn assert_eq_m128(a: __m128, b: __m128) { + let r = _mm_cmpeq_ps(a, b); + if _mm_movemask_ps(r) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "sse2")] +unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "sse2")] +unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +} + +#[track_caller] +#[target_feature(enable = "avx")] +unsafe fn assert_eq_m256(a: __m256, b: __m256) { + let cmp = _mm256_cmp_ps::<_CMP_EQ_OQ>(a, b); + if _mm256_movemask_ps(cmp) != 0b11111111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "avx")] +unsafe fn assert_eq_m256d(a: __m256d, b: __m256d) { + let cmp = _mm256_cmp_pd::<_CMP_EQ_OQ>(a, b); + if _mm256_movemask_pd(cmp) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "avx")] +unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { + assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b)) +} + +/// Stores `T` in an unaligned address +struct Unaligned { + buf: Vec, + offset: bool, + _marker: std::marker::PhantomData, +} + +impl Unaligned { + fn new(value: T) -> Self { + // Allocate extra byte for unalignment headroom + let len = std::mem::size_of::(); + let mut buf = Vec::::with_capacity(len + 1); + // Force the address to be a non-multiple of 2, so it is as unaligned as it can get. + let offset = (buf.as_ptr() as usize % 2) == 0; + let value_ptr: *const T = &value; + unsafe { + buf.as_mut_ptr().add(offset.into()).copy_from_nonoverlapping(value_ptr.cast(), len); + } + Self { buf, offset, _marker: std::marker::PhantomData } + } + + fn as_ptr(&self) -> *const T { + unsafe { self.buf.as_ptr().add(self.offset.into()).cast() } + } + + fn as_mut_ptr(&mut self) -> *mut T { + unsafe { self.buf.as_mut_ptr().add(self.offset.into()).cast() } + } + + fn read(&self) -> T { + unsafe { self.as_ptr().read_unaligned() } + } }