From e41240e45b58ea1bcf42fe5503cafec4c9dca1a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Tue, 22 Aug 2023 02:36:41 +0200 Subject: [PATCH 1/2] Fix races conditions with `SyntaxContext` decoding --- .../src/sync/worker_local.rs | 6 + compiler/rustc_span/src/hygiene.rs | 114 ++++++++++++------ 2 files changed, 86 insertions(+), 34 deletions(-) diff --git a/compiler/rustc_data_structures/src/sync/worker_local.rs b/compiler/rustc_data_structures/src/sync/worker_local.rs index 8c84daf4f16..27d687c3cfe 100644 --- a/compiler/rustc_data_structures/src/sync/worker_local.rs +++ b/compiler/rustc_data_structures/src/sync/worker_local.rs @@ -171,3 +171,9 @@ impl Deref for WorkerLocal { unsafe { &self.locals.get_unchecked(self.registry.id().verify()).0 } } } + +impl Default for WorkerLocal { + fn default() -> Self { + WorkerLocal::new(|_| T::default()) + } +} diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs index 9f2ff437842..37ec2f0801c 100644 --- a/compiler/rustc_span/src/hygiene.rs +++ b/compiler/rustc_span/src/hygiene.rs @@ -34,11 +34,13 @@ use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::stable_hasher::HashingControls; use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher}; -use rustc_data_structures::sync::{Lock, Lrc}; +use rustc_data_structures::sync::{Lock, Lrc, WorkerLocal}; use rustc_data_structures::unhash::UnhashMap; use rustc_index::IndexVec; use rustc_macros::HashStable_Generic; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::cell::RefCell; +use std::collections::hash_map::Entry; use std::fmt; use std::hash::Hash; @@ -1241,13 +1243,25 @@ impl HygieneEncodeContext { #[derive(Default)] /// Additional information used to assist in decoding hygiene data -pub struct HygieneDecodeContext { +struct HygieneDecodeContextInner { // Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current // global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create // a new id in the global `HygieneData`. This map tracks the ID we end up picking, // so that multiple occurrences of the same serialized id are decoded to the same - // `SyntaxContext` - remapped_ctxts: Lock>>, + // `SyntaxContext`. This only stores `SyntaxContext`s which are completly decoded. + remapped_ctxts: Vec>, + + /// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`. + decoding: FxHashMap, +} + +#[derive(Default)] +/// Additional information used to assist in decoding hygiene data +pub struct HygieneDecodeContext { + inner: Lock, + + /// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread. + local_in_progress: WorkerLocal>>, } /// Register an expansion which has been decoded from the on-disk-cache for the local crate. @@ -1331,38 +1345,56 @@ pub fn decode_syntax_context SyntaxContext return SyntaxContext::root(); } - let outer_ctxts = &context.remapped_ctxts; + let ctxt = { + let mut inner = context.inner.lock(); - // Ensure that the lock() temporary is dropped early - { - if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() { + if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() { + // This has already beeen decoded. return ctxt; } - } - // Allocate and store SyntaxContext id *before* calling the decoder function, - // as the SyntaxContextData may reference itself. - let new_ctxt = HygieneData::with(|hygiene_data| { - let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32); - // Push a dummy SyntaxContextData to ensure that nobody else can get the - // same ID as us. This will be overwritten after call `decode_Data` - hygiene_data.syntax_context_data.push(SyntaxContextData { - outer_expn: ExpnId::root(), - outer_transparency: Transparency::Transparent, - parent: SyntaxContext::root(), - opaque: SyntaxContext::root(), - opaque_and_semitransparent: SyntaxContext::root(), - dollar_crate_name: kw::Empty, - }); - let mut ctxts = outer_ctxts.lock(); - let new_len = raw_id as usize + 1; - if ctxts.len() < new_len { - ctxts.resize(new_len, None); + match inner.decoding.entry(raw_id) { + Entry::Occupied(ctxt_entry) => { + match context.local_in_progress.borrow_mut().entry(raw_id) { + Entry::Occupied(..) => { + // We're decoding this already on the current thread. Return here + // and let the function higher up the stack finish decoding to handle + // recursive cases. + return *ctxt_entry.get(); + } + Entry::Vacant(entry) => { + entry.insert(()); + + // Some other thread is current decoding this. Race with it. + *ctxt_entry.get() + } + } + } + Entry::Vacant(entry) => { + // We are the first thread to start decoding. Mark the current thread as being progress. + context.local_in_progress.borrow_mut().insert(raw_id, ()); + + // Allocate and store SyntaxContext id *before* calling the decoder function, + // as the SyntaxContextData may reference itself. + let new_ctxt = HygieneData::with(|hygiene_data| { + let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32); + // Push a dummy SyntaxContextData to ensure that nobody else can get the + // same ID as us. This will be overwritten after call `decode_Data` + hygiene_data.syntax_context_data.push(SyntaxContextData { + outer_expn: ExpnId::root(), + outer_transparency: Transparency::Transparent, + parent: SyntaxContext::root(), + opaque: SyntaxContext::root(), + opaque_and_semitransparent: SyntaxContext::root(), + dollar_crate_name: kw::Empty, + }); + new_ctxt + }); + entry.insert(new_ctxt); + new_ctxt + } } - ctxts[raw_id as usize] = Some(new_ctxt); - drop(ctxts); - new_ctxt - }); + }; // Don't try to decode data while holding the lock, since we need to // be able to recursively decode a SyntaxContext @@ -1375,14 +1407,28 @@ pub fn decode_syntax_context SyntaxContext // Overwrite the dummy data with our decoded SyntaxContextData HygieneData::with(|hygiene_data| { let dummy = std::mem::replace( - &mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize], + &mut hygiene_data.syntax_context_data[ctxt.as_u32() as usize], ctxt_data, ); // Make sure nothing weird happening while `decode_data` was running - assert_eq!(dummy.dollar_crate_name, kw::Empty); + if cfg!(not(parallel_compiler)) { + assert_eq!(dummy.dollar_crate_name, kw::Empty); + } }); - new_ctxt + // Mark the context as completed + + context.local_in_progress.borrow_mut().remove(&raw_id); + + let mut inner = context.inner.lock(); + let new_len = raw_id as usize + 1; + if inner.remapped_ctxts.len() < new_len { + inner.remapped_ctxts.resize(new_len, None); + } + inner.remapped_ctxts[raw_id as usize] = Some(ctxt); + inner.decoding.remove(&raw_id); + + ctxt } fn for_all_ctxts_in( From 246a6a6589b118c0a08e449f981bb4ae07bc089a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Wed, 23 Aug 2023 20:16:22 +0200 Subject: [PATCH 2/2] Extend comment on assertion --- compiler/rustc_span/src/hygiene.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs index 37ec2f0801c..ab57d5c6803 100644 --- a/compiler/rustc_span/src/hygiene.rs +++ b/compiler/rustc_span/src/hygiene.rs @@ -1410,8 +1410,12 @@ pub fn decode_syntax_context SyntaxContext &mut hygiene_data.syntax_context_data[ctxt.as_u32() as usize], ctxt_data, ); - // Make sure nothing weird happening while `decode_data` was running if cfg!(not(parallel_compiler)) { + // Make sure nothing weird happened while `decode_data` was running. + // We used `kw::Empty` for the dummy value and we expect nothing to be + // modifying the dummy entry. + // This does not hold for the parallel compiler as another thread may + // have inserted the fully decoded data. assert_eq!(dummy.dollar_crate_name, kw::Empty); } });