Auto merge of #115082 - Zoxc:syntax-context-decode-race, r=cjgillot

Fix races conditions with `SyntaxContext` decoding

This changes `SyntaxContext` decoding to work with concurrent decoding. The `remapped_ctxts` field now only stores `SyntaxContext` which have completed decoding, while the new `decoding` and `local_in_progress` keeps track of `SyntaxContext`s which are in process of being decoding and on which threads.

This fixes 2 issues with the current implementation. It can return an `SyntaxContext` which contains dummy data if another thread starts decoding before the first one has completely finished. Multiple threads could also allocate multiple `SyntaxContext`s for the same `raw_id`.
This commit is contained in:
bors 2023-08-24 17:43:02 +00:00
commit b60e31b673
2 changed files with 91 additions and 35 deletions

View file

@ -171,3 +171,9 @@ impl<T> Deref for WorkerLocal<T> {
unsafe { &self.locals.get_unchecked(self.registry.id().verify()).0 }
}
}
impl<T: Default> Default for WorkerLocal<T> {
fn default() -> Self {
WorkerLocal::new(|_| T::default())
}
}

View file

@ -34,11 +34,13 @@ use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::stable_hasher::HashingControls;
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
use rustc_data_structures::sync::{Lock, Lrc};
use rustc_data_structures::sync::{Lock, Lrc, WorkerLocal};
use rustc_data_structures::unhash::UnhashMap;
use rustc_index::IndexVec;
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::fmt;
use std::hash::Hash;
@ -1241,13 +1243,25 @@ impl HygieneEncodeContext {
#[derive(Default)]
/// Additional information used to assist in decoding hygiene data
pub struct HygieneDecodeContext {
struct HygieneDecodeContextInner {
// Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current
// global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create
// a new id in the global `HygieneData`. This map tracks the ID we end up picking,
// so that multiple occurrences of the same serialized id are decoded to the same
// `SyntaxContext`
remapped_ctxts: Lock<Vec<Option<SyntaxContext>>>,
// `SyntaxContext`. This only stores `SyntaxContext`s which are completly decoded.
remapped_ctxts: Vec<Option<SyntaxContext>>,
/// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
decoding: FxHashMap<u32, SyntaxContext>,
}
#[derive(Default)]
/// Additional information used to assist in decoding hygiene data
pub struct HygieneDecodeContext {
inner: Lock<HygieneDecodeContextInner>,
/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
local_in_progress: WorkerLocal<RefCell<FxHashMap<u32, ()>>>,
}
/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
@ -1331,38 +1345,56 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
return SyntaxContext::root();
}
let outer_ctxts = &context.remapped_ctxts;
let ctxt = {
let mut inner = context.inner.lock();
// Ensure that the lock() temporary is dropped early
{
if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() {
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
// This has already beeen decoded.
return ctxt;
}
}
// Allocate and store SyntaxContext id *before* calling the decoder function,
// as the SyntaxContextData may reference itself.
let new_ctxt = HygieneData::with(|hygiene_data| {
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
// Push a dummy SyntaxContextData to ensure that nobody else can get the
// same ID as us. This will be overwritten after call `decode_Data`
hygiene_data.syntax_context_data.push(SyntaxContextData {
outer_expn: ExpnId::root(),
outer_transparency: Transparency::Transparent,
parent: SyntaxContext::root(),
opaque: SyntaxContext::root(),
opaque_and_semitransparent: SyntaxContext::root(),
dollar_crate_name: kw::Empty,
});
let mut ctxts = outer_ctxts.lock();
let new_len = raw_id as usize + 1;
if ctxts.len() < new_len {
ctxts.resize(new_len, None);
match inner.decoding.entry(raw_id) {
Entry::Occupied(ctxt_entry) => {
match context.local_in_progress.borrow_mut().entry(raw_id) {
Entry::Occupied(..) => {
// We're decoding this already on the current thread. Return here
// and let the function higher up the stack finish decoding to handle
// recursive cases.
return *ctxt_entry.get();
}
Entry::Vacant(entry) => {
entry.insert(());
// Some other thread is current decoding this. Race with it.
*ctxt_entry.get()
}
}
}
Entry::Vacant(entry) => {
// We are the first thread to start decoding. Mark the current thread as being progress.
context.local_in_progress.borrow_mut().insert(raw_id, ());
// Allocate and store SyntaxContext id *before* calling the decoder function,
// as the SyntaxContextData may reference itself.
let new_ctxt = HygieneData::with(|hygiene_data| {
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
// Push a dummy SyntaxContextData to ensure that nobody else can get the
// same ID as us. This will be overwritten after call `decode_Data`
hygiene_data.syntax_context_data.push(SyntaxContextData {
outer_expn: ExpnId::root(),
outer_transparency: Transparency::Transparent,
parent: SyntaxContext::root(),
opaque: SyntaxContext::root(),
opaque_and_semitransparent: SyntaxContext::root(),
dollar_crate_name: kw::Empty,
});
new_ctxt
});
entry.insert(new_ctxt);
new_ctxt
}
}
ctxts[raw_id as usize] = Some(new_ctxt);
drop(ctxts);
new_ctxt
});
};
// Don't try to decode data while holding the lock, since we need to
// be able to recursively decode a SyntaxContext
@ -1375,14 +1407,32 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
// Overwrite the dummy data with our decoded SyntaxContextData
HygieneData::with(|hygiene_data| {
let dummy = std::mem::replace(
&mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize],
&mut hygiene_data.syntax_context_data[ctxt.as_u32() as usize],
ctxt_data,
);
// Make sure nothing weird happening while `decode_data` was running
assert_eq!(dummy.dollar_crate_name, kw::Empty);
if cfg!(not(parallel_compiler)) {
// Make sure nothing weird happened while `decode_data` was running.
// We used `kw::Empty` for the dummy value and we expect nothing to be
// modifying the dummy entry.
// This does not hold for the parallel compiler as another thread may
// have inserted the fully decoded data.
assert_eq!(dummy.dollar_crate_name, kw::Empty);
}
});
new_ctxt
// Mark the context as completed
context.local_in_progress.borrow_mut().remove(&raw_id);
let mut inner = context.inner.lock();
let new_len = raw_id as usize + 1;
if inner.remapped_ctxts.len() < new_len {
inner.remapped_ctxts.resize(new_len, None);
}
inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
inner.decoding.remove(&raw_id);
ctxt
}
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(