Auto merge of #121317 - ChrisDenton:win10-sync, r=Mark-Simulacrum

Always use WaitOnAddress on Win10+

`WaitOnAddress` and `WakeByAddressSingle` are always available since Windows 8 so they can now be used without needing to delay load. I've also moved the Windows 7 thread parking fallbacks into a separate sub-module.
This commit is contained in:
bors 2024-02-26 06:31:30 +00:00
commit dc00e8cdb6
4 changed files with 160 additions and 112 deletions

View file

@ -28,17 +28,14 @@ pub type SIZE_T = usize;
pub type WORD = u16;
pub type CHAR = c_char;
pub type ULONG = c_ulong;
pub type ACCESS_MASK = DWORD;
pub type LPCVOID = *const c_void;
pub type LPHANDLE = *mut HANDLE;
pub type LPOVERLAPPED = *mut OVERLAPPED;
pub type LPSECURITY_ATTRIBUTES = *mut SECURITY_ATTRIBUTES;
pub type LPVOID = *mut c_void;
pub type LPWCH = *mut WCHAR;
pub type LPWSTR = *mut WCHAR;
pub type PLARGE_INTEGER = *mut c_longlong;
pub type PSRWLOCK = *mut SRWLOCK;
pub type socklen_t = c_int;
@ -360,6 +357,19 @@ compat_fn_with_fallback! {
}
}
#[cfg(not(target_vendor = "win7"))]
#[link(name = "synchronization")]
extern "system" {
pub fn WaitOnAddress(
address: *const c_void,
compareaddress: *const c_void,
addresssize: usize,
dwmilliseconds: u32,
) -> BOOL;
pub fn WakeByAddressSingle(address: *const c_void);
}
#[cfg(target_vendor = "win7")]
compat_fn_optional! {
crate::sys::compat::load_synch_functions();
pub fn WaitOnAddress(
@ -371,30 +381,34 @@ compat_fn_optional! {
pub fn WakeByAddressSingle(address: *const ::core::ffi::c_void);
}
#[cfg(any(target_vendor = "win7", target_vendor = "uwp"))]
compat_fn_with_fallback! {
pub static NTDLL: &CStr = c"ntdll";
#[cfg(target_vendor = "win7")]
pub fn NtCreateKeyedEvent(
KeyedEventHandle: LPHANDLE,
DesiredAccess: ACCESS_MASK,
KeyedEventHandle: *mut HANDLE,
DesiredAccess: DWORD,
ObjectAttributes: LPVOID,
Flags: ULONG
) -> NTSTATUS {
panic!("keyed events not available")
}
#[cfg(target_vendor = "win7")]
pub fn NtReleaseKeyedEvent(
EventHandle: HANDLE,
Key: LPVOID,
Alertable: BOOLEAN,
Timeout: PLARGE_INTEGER
Timeout: *mut c_longlong
) -> NTSTATUS {
panic!("keyed events not available")
}
#[cfg(target_vendor = "win7")]
pub fn NtWaitForKeyedEvent(
EventHandle: HANDLE,
Key: LPVOID,
Alertable: BOOLEAN,
Timeout: PLARGE_INTEGER
Timeout: *mut c_longlong
) -> NTSTATUS {
panic!("keyed events not available")
}

View file

@ -21,7 +21,6 @@
use crate::ffi::{c_void, CStr};
use crate::ptr::NonNull;
use crate::sync::atomic::Ordering;
use crate::sys::c;
// This uses a static initializer to preload some imported functions.
@ -38,6 +37,7 @@ use crate::sys::c;
// file an issue for discussion; currently we don't guarantee any functionality
// before main.
// See https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization?view=msvc-170
#[cfg(target_vendor = "win7")]
#[used]
#[link_section = ".CRT$XCT"]
static INIT_TABLE_ENTRY: unsafe extern "C" fn() = init;
@ -52,6 +52,7 @@ static INIT_TABLE_ENTRY: unsafe extern "C" fn() = init;
/// negative performance impact in practical situations.
///
/// Currently we only preload `WaitOnAddress` and `WakeByAddressSingle`.
#[cfg(target_vendor = "win7")]
unsafe extern "C" fn init() {
// In an exe this code is executed before main() so is single threaded.
// In a DLL the system's loader lock will be held thereby synchronizing
@ -183,6 +184,7 @@ macro_rules! compat_fn_with_fallback {
func($($argname),*)
}
}
#[allow(unused)]
$(#[$meta])*
$vis use $symbol::call as $symbol;
)*)
@ -191,6 +193,7 @@ macro_rules! compat_fn_with_fallback {
/// Optionally loaded functions.
///
/// Actual loading of the function defers to $load_functions.
#[cfg(target_vendor = "win7")]
macro_rules! compat_fn_optional {
($load_functions:expr;
$(
@ -218,13 +221,19 @@ macro_rules! compat_fn_optional {
NonNull::new(PTR.load(Ordering::Relaxed)).map(|f| unsafe { mem::transmute(f) })
}
}
#[inline]
pub unsafe extern "system" fn $symbol($($argname: $argtype),*) $(-> $rettype)? {
$symbol::option().unwrap()($($argname),*)
}
)+
)
}
/// Load all needed functions from "api-ms-win-core-synch-l1-2-0".
#[cfg(target_vendor = "win7")]
pub(super) fn load_synch_functions() {
fn try_load() -> Option<()> {
use crate::sync::atomic::Ordering;
const MODULE_NAME: &CStr = c"api-ms-win-core-synch-l1-2-0";
const WAIT_ON_ADDRESS: &CStr = c"WaitOnAddress";
const WAKE_BY_ADDRESS_SINGLE: &CStr = c"WakeByAddressSingle";

View file

@ -58,10 +58,9 @@
// [4]: Windows Internals, Part 1, ISBN 9780735671300
use crate::pin::Pin;
use crate::ptr;
use crate::sync::atomic::{
AtomicI8, AtomicPtr,
Ordering::{Acquire, Relaxed, Release},
AtomicI8,
Ordering::{Acquire, Release},
};
use crate::sys::{c, dur2timeout};
use crate::time::Duration;
@ -111,26 +110,21 @@ impl Parker {
return;
}
if let Some(wait_on_address) = c::WaitOnAddress::option() {
loop {
// Wait for something to happen, assuming it's still set to PARKED.
wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE);
// Change NOTIFIED=>EMPTY but leave PARKED alone.
if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() {
// Actually woken up by unpark().
return;
} else {
// Spurious wake up. We loop to try again.
}
#[cfg(target_vendor = "win7")]
if c::WaitOnAddress::option().is_none() {
return keyed_events::park(self);
}
loop {
// Wait for something to happen, assuming it's still set to PARKED.
c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE);
// Change NOTIFIED=>EMPTY but leave PARKED alone.
if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() {
// Actually woken up by unpark().
return;
} else {
// Spurious wake up. We loop to try again.
}
} else {
// Wait for unpark() to produce this event.
c::NtWaitForKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
// Set the state back to EMPTY (from either PARKED or NOTIFIED).
// Note that we don't just write EMPTY, but use swap() to also
// include an acquire-ordered read to synchronize with unpark()'s
// release-ordered write.
self.state.swap(EMPTY, Acquire);
}
}
@ -144,47 +138,23 @@ impl Parker {
return;
}
if let Some(wait_on_address) = c::WaitOnAddress::option() {
// Wait for something to happen, assuming it's still set to PARKED.
wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout));
// Set the state back to EMPTY (from either PARKED or NOTIFIED).
// Note that we don't just write EMPTY, but use swap() to also
// include an acquire-ordered read to synchronize with unpark()'s
// release-ordered write.
if self.state.swap(EMPTY, Acquire) == NOTIFIED {
// Actually woken up by unpark().
} else {
// Timeout or spurious wake up.
// We return either way, because we can't easily tell if it was the
// timeout or not.
}
#[cfg(target_vendor = "win7")]
if c::WaitOnAddress::option().is_none() {
return keyed_events::park_timeout(self, timeout);
}
// Wait for something to happen, assuming it's still set to PARKED.
c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout));
// Set the state back to EMPTY (from either PARKED or NOTIFIED).
// Note that we don't just write EMPTY, but use swap() to also
// include an acquire-ordered read to synchronize with unpark()'s
// release-ordered write.
if self.state.swap(EMPTY, Acquire) == NOTIFIED {
// Actually woken up by unpark().
} else {
// Need to wait for unpark() using NtWaitForKeyedEvent.
let handle = keyed_event_handle();
// NtWaitForKeyedEvent uses a unit of 100ns, and uses negative
// values to indicate a relative time on the monotonic clock.
// This is documented here for the underlying KeWaitForSingleObject function:
// https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject
let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) {
Ok(t) => -t,
Err(_) => i64::MIN,
};
// Wait for unpark() to produce this event.
let unparked =
c::NtWaitForKeyedEvent(handle, self.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS;
// Set the state back to EMPTY (from either PARKED or NOTIFIED).
let prev_state = self.state.swap(EMPTY, Acquire);
if !unparked && prev_state == NOTIFIED {
// We were awoken by a timeout, not by unpark(), but the state
// was set to NOTIFIED, which means we *just* missed an
// unpark(), which is now blocked on us to wait for it.
// Wait for it to consume the event and unblock that thread.
c::NtWaitForKeyedEvent(handle, self.ptr(), 0, ptr::null_mut());
}
// Timeout or spurious wake up.
// We return either way, because we can't easily tell if it was the
// timeout or not.
}
}
@ -198,18 +168,11 @@ impl Parker {
// with park().
if self.state.swap(NOTIFIED, Release) == PARKED {
unsafe {
if let Some(wake_by_address_single) = c::WakeByAddressSingle::option() {
wake_by_address_single(self.ptr());
} else {
// If we run NtReleaseKeyedEvent before the waiting thread runs
// NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up.
// If the waiting thread wakes up before we run NtReleaseKeyedEvent
// (e.g. due to a timeout), this blocks until we do wake up a thread.
// To prevent this thread from blocking indefinitely in that case,
// park_impl() will, after seeing the state set to NOTIFIED after
// waking up, call NtWaitForKeyedEvent again to unblock us.
c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut());
#[cfg(target_vendor = "win7")]
if c::WakeByAddressSingle::option().is_none() {
return keyed_events::unpark(self);
}
c::WakeByAddressSingle(self.ptr());
}
}
}
@ -219,35 +182,97 @@ impl Parker {
}
}
fn keyed_event_handle() -> c::HANDLE {
const INVALID: c::HANDLE = ptr::without_provenance_mut(!0);
static HANDLE: AtomicPtr<crate::ffi::c_void> = AtomicPtr::new(INVALID);
match HANDLE.load(Relaxed) {
INVALID => {
let mut handle = c::INVALID_HANDLE_VALUE;
unsafe {
match c::NtCreateKeyedEvent(
&mut handle,
c::GENERIC_READ | c::GENERIC_WRITE,
ptr::null_mut(),
0,
) {
c::STATUS_SUCCESS => {}
r => panic!("Unable to create keyed event handle: error {r}"),
}
}
match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) {
Ok(_) => handle,
Err(h) => {
// Lost the race to another thread initializing HANDLE before we did.
// Closing our handle and using theirs instead.
unsafe {
c::CloseHandle(handle);
}
h
}
}
#[cfg(target_vendor = "win7")]
mod keyed_events {
use super::{Parker, EMPTY, NOTIFIED};
use crate::sys::c;
use core::pin::Pin;
use core::ptr;
use core::sync::atomic::{
AtomicPtr,
Ordering::{Acquire, Relaxed},
};
use core::time::Duration;
pub unsafe fn park(parker: Pin<&Parker>) {
// Wait for unpark() to produce this event.
c::NtWaitForKeyedEvent(keyed_event_handle(), parker.ptr(), 0, ptr::null_mut());
// Set the state back to EMPTY (from either PARKED or NOTIFIED).
// Note that we don't just write EMPTY, but use swap() to also
// include an acquire-ordered read to synchronize with unpark()'s
// release-ordered write.
parker.state.swap(EMPTY, Acquire);
return;
}
pub unsafe fn park_timeout(parker: Pin<&Parker>, timeout: Duration) {
// Need to wait for unpark() using NtWaitForKeyedEvent.
let handle = keyed_event_handle();
// NtWaitForKeyedEvent uses a unit of 100ns, and uses negative
// values to indicate a relative time on the monotonic clock.
// This is documented here for the underlying KeWaitForSingleObject function:
// https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject
let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) {
Ok(t) => -t,
Err(_) => i64::MIN,
};
// Wait for unpark() to produce this event.
let unparked =
c::NtWaitForKeyedEvent(handle, parker.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS;
// Set the state back to EMPTY (from either PARKED or NOTIFIED).
let prev_state = parker.state.swap(EMPTY, Acquire);
if !unparked && prev_state == NOTIFIED {
// We were awoken by a timeout, not by unpark(), but the state
// was set to NOTIFIED, which means we *just* missed an
// unpark(), which is now blocked on us to wait for it.
// Wait for it to consume the event and unblock that thread.
c::NtWaitForKeyedEvent(handle, parker.ptr(), 0, ptr::null_mut());
}
}
pub unsafe fn unpark(parker: Pin<&Parker>) {
// If we run NtReleaseKeyedEvent before the waiting thread runs
// NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up.
// If the waiting thread wakes up before we run NtReleaseKeyedEvent
// (e.g. due to a timeout), this blocks until we do wake up a thread.
// To prevent this thread from blocking indefinitely in that case,
// park_impl() will, after seeing the state set to NOTIFIED after
// waking up, call NtWaitForKeyedEvent again to unblock us.
c::NtReleaseKeyedEvent(keyed_event_handle(), parker.ptr(), 0, ptr::null_mut());
}
fn keyed_event_handle() -> c::HANDLE {
const INVALID: c::HANDLE = ptr::without_provenance_mut(!0);
static HANDLE: AtomicPtr<crate::ffi::c_void> = AtomicPtr::new(INVALID);
match HANDLE.load(Relaxed) {
INVALID => {
let mut handle = c::INVALID_HANDLE_VALUE;
unsafe {
match c::NtCreateKeyedEvent(
&mut handle,
c::GENERIC_READ | c::GENERIC_WRITE,
ptr::null_mut(),
0,
) {
c::STATUS_SUCCESS => {}
r => panic!("Unable to create keyed event handle: error {r}"),
}
}
match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) {
Ok(_) => handle,
Err(h) => {
// Lost the race to another thread initializing HANDLE before we did.
// Closing our handle and using theirs instead.
unsafe {
c::CloseHandle(handle);
}
h
}
}
}
handle => handle,
}
handle => handle,
}
}

View file

@ -134,9 +134,9 @@ endif
# Extra flags needed to compile a working executable with the standard library
ifdef IS_WINDOWS
ifdef IS_MSVC
EXTRACFLAGS := ws2_32.lib userenv.lib advapi32.lib bcrypt.lib ntdll.lib
EXTRACFLAGS := ws2_32.lib userenv.lib advapi32.lib bcrypt.lib ntdll.lib synchronization.lib
else
EXTRACFLAGS := -lws2_32 -luserenv -lbcrypt -lntdll
EXTRACFLAGS := -lws2_32 -luserenv -lbcrypt -lntdll -lsynchronization
EXTRACXXFLAGS := -lstdc++
# So this is a bit hacky: we can't use the DLL version of libstdc++ because
# it pulls in the DLL version of libgcc, which means that we end up with 2