Skip to content

Commit b60e31b

Browse files
committed
Auto merge of rust-lang#115082 - Zoxc:syntax-context-decode-race, r=cjgillot
Fix races conditions with `SyntaxContext` decoding This changes `SyntaxContext` decoding to work with concurrent decoding. The `remapped_ctxts` field now only stores `SyntaxContext` which have completed decoding, while the new `decoding` and `local_in_progress` keeps track of `SyntaxContext`s which are in process of being decoding and on which threads. This fixes 2 issues with the current implementation. It can return an `SyntaxContext` which contains dummy data if another thread starts decoding before the first one has completely finished. Multiple threads could also allocate multiple `SyntaxContext`s for the same `raw_id`.
2 parents aa5dbee + 246a6a6 commit b60e31b

File tree

2 files changed

+91
-35
lines changed

2 files changed

+91
-35
lines changed

compiler/rustc_data_structures/src/sync/worker_local.rs

+6
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,9 @@ impl<T> Deref for WorkerLocal<T> {
171171
unsafe { &self.locals.get_unchecked(self.registry.id().verify()).0 }
172172
}
173173
}
174+
175+
impl<T: Default> Default for WorkerLocal<T> {
176+
fn default() -> Self {
177+
WorkerLocal::new(|_| T::default())
178+
}
179+
}

compiler/rustc_span/src/hygiene.rs

+85-35
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,13 @@ use rustc_data_structures::fingerprint::Fingerprint;
3434
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
3535
use rustc_data_structures::stable_hasher::HashingControls;
3636
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
37-
use rustc_data_structures::sync::{Lock, Lrc};
37+
use rustc_data_structures::sync::{Lock, Lrc, WorkerLocal};
3838
use rustc_data_structures::unhash::UnhashMap;
3939
use rustc_index::IndexVec;
4040
use rustc_macros::HashStable_Generic;
4141
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
42+
use std::cell::RefCell;
43+
use std::collections::hash_map::Entry;
4244
use std::fmt;
4345
use std::hash::Hash;
4446

@@ -1241,13 +1243,25 @@ impl HygieneEncodeContext {
12411243

12421244
#[derive(Default)]
12431245
/// Additional information used to assist in decoding hygiene data
1244-
pub struct HygieneDecodeContext {
1246+
struct HygieneDecodeContextInner {
12451247
// Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current
12461248
// global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create
12471249
// a new id in the global `HygieneData`. This map tracks the ID we end up picking,
12481250
// so that multiple occurrences of the same serialized id are decoded to the same
1249-
// `SyntaxContext`
1250-
remapped_ctxts: Lock<Vec<Option<SyntaxContext>>>,
1251+
// `SyntaxContext`. This only stores `SyntaxContext`s which are completly decoded.
1252+
remapped_ctxts: Vec<Option<SyntaxContext>>,
1253+
1254+
/// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
1255+
decoding: FxHashMap<u32, SyntaxContext>,
1256+
}
1257+
1258+
#[derive(Default)]
1259+
/// Additional information used to assist in decoding hygiene data
1260+
pub struct HygieneDecodeContext {
1261+
inner: Lock<HygieneDecodeContextInner>,
1262+
1263+
/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
1264+
local_in_progress: WorkerLocal<RefCell<FxHashMap<u32, ()>>>,
12511265
}
12521266

12531267
/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
@@ -1331,38 +1345,56 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
13311345
return SyntaxContext::root();
13321346
}
13331347

1334-
let outer_ctxts = &context.remapped_ctxts;
1348+
let ctxt = {
1349+
let mut inner = context.inner.lock();
13351350

1336-
// Ensure that the lock() temporary is dropped early
1337-
{
1338-
if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() {
1351+
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
1352+
// This has already beeen decoded.
13391353
return ctxt;
13401354
}
1341-
}
13421355

1343-
// Allocate and store SyntaxContext id *before* calling the decoder function,
1344-
// as the SyntaxContextData may reference itself.
1345-
let new_ctxt = HygieneData::with(|hygiene_data| {
1346-
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
1347-
// Push a dummy SyntaxContextData to ensure that nobody else can get the
1348-
// same ID as us. This will be overwritten after call `decode_Data`
1349-
hygiene_data.syntax_context_data.push(SyntaxContextData {
1350-
outer_expn: ExpnId::root(),
1351-
outer_transparency: Transparency::Transparent,
1352-
parent: SyntaxContext::root(),
1353-
opaque: SyntaxContext::root(),
1354-
opaque_and_semitransparent: SyntaxContext::root(),
1355-
dollar_crate_name: kw::Empty,
1356-
});
1357-
let mut ctxts = outer_ctxts.lock();
1358-
let new_len = raw_id as usize + 1;
1359-
if ctxts.len() < new_len {
1360-
ctxts.resize(new_len, None);
1356+
match inner.decoding.entry(raw_id) {
1357+
Entry::Occupied(ctxt_entry) => {
1358+
match context.local_in_progress.borrow_mut().entry(raw_id) {
1359+
Entry::Occupied(..) => {
1360+
// We're decoding this already on the current thread. Return here
1361+
// and let the function higher up the stack finish decoding to handle
1362+
// recursive cases.
1363+
return *ctxt_entry.get();
1364+
}
1365+
Entry::Vacant(entry) => {
1366+
entry.insert(());
1367+
1368+
// Some other thread is current decoding this. Race with it.
1369+
*ctxt_entry.get()
1370+
}
1371+
}
1372+
}
1373+
Entry::Vacant(entry) => {
1374+
// We are the first thread to start decoding. Mark the current thread as being progress.
1375+
context.local_in_progress.borrow_mut().insert(raw_id, ());
1376+
1377+
// Allocate and store SyntaxContext id *before* calling the decoder function,
1378+
// as the SyntaxContextData may reference itself.
1379+
let new_ctxt = HygieneData::with(|hygiene_data| {
1380+
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
1381+
// Push a dummy SyntaxContextData to ensure that nobody else can get the
1382+
// same ID as us. This will be overwritten after call `decode_Data`
1383+
hygiene_data.syntax_context_data.push(SyntaxContextData {
1384+
outer_expn: ExpnId::root(),
1385+
outer_transparency: Transparency::Transparent,
1386+
parent: SyntaxContext::root(),
1387+
opaque: SyntaxContext::root(),
1388+
opaque_and_semitransparent: SyntaxContext::root(),
1389+
dollar_crate_name: kw::Empty,
1390+
});
1391+
new_ctxt
1392+
});
1393+
entry.insert(new_ctxt);
1394+
new_ctxt
1395+
}
13611396
}
1362-
ctxts[raw_id as usize] = Some(new_ctxt);
1363-
drop(ctxts);
1364-
new_ctxt
1365-
});
1397+
};
13661398

13671399
// Don't try to decode data while holding the lock, since we need to
13681400
// be able to recursively decode a SyntaxContext
@@ -1375,14 +1407,32 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
13751407
// Overwrite the dummy data with our decoded SyntaxContextData
13761408
HygieneData::with(|hygiene_data| {
13771409
let dummy = std::mem::replace(
1378-
&mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize],
1410+
&mut hygiene_data.syntax_context_data[ctxt.as_u32() as usize],
13791411
ctxt_data,
13801412
);
1381-
// Make sure nothing weird happening while `decode_data` was running
1382-
assert_eq!(dummy.dollar_crate_name, kw::Empty);
1413+
if cfg!(not(parallel_compiler)) {
1414+
// Make sure nothing weird happened while `decode_data` was running.
1415+
// We used `kw::Empty` for the dummy value and we expect nothing to be
1416+
// modifying the dummy entry.
1417+
// This does not hold for the parallel compiler as another thread may
1418+
// have inserted the fully decoded data.
1419+
assert_eq!(dummy.dollar_crate_name, kw::Empty);
1420+
}
13831421
});
13841422

1385-
new_ctxt
1423+
// Mark the context as completed
1424+
1425+
context.local_in_progress.borrow_mut().remove(&raw_id);
1426+
1427+
let mut inner = context.inner.lock();
1428+
let new_len = raw_id as usize + 1;
1429+
if inner.remapped_ctxts.len() < new_len {
1430+
inner.remapped_ctxts.resize(new_len, None);
1431+
}
1432+
inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
1433+
inner.decoding.remove(&raw_id);
1434+
1435+
ctxt
13861436
}
13871437

13881438
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(

0 commit comments

Comments
 (0)