Skip to content

Commit 9f79d2f

Browse files
committed
Auto merge of #50997 - michaelwoerister:pre-analyze-filemaps, r=Mark-Simulacrum
Make FileMap::{lines, multibyte_chars, non_narrow_chars} non-mutable. This PR removes most of the interior mutability from `FileMap`, which should be beneficial, especially in a multithreaded setting. This is achieved by initializing the state in question when the filemap is constructed instead of during lexing. Hopefully this doesn't degrade performance. cc @wesleywiser
2 parents d84ad59 + a1f8a6c commit 9f79d2f

File tree

13 files changed

+553
-232
lines changed

13 files changed

+553
-232
lines changed

src/Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -2779,6 +2779,7 @@ name = "syntax_pos"
27792779
version = "0.0.0"
27802780
dependencies = [
27812781
"arena 0.0.0",
2782+
"cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
27822783
"rustc_data_structures 0.0.0",
27832784
"scoped-tls 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
27842785
"serialize 0.0.0",

src/librustc/ich/impls_syntax.rs

+12-18
Original file line numberDiff line numberDiff line change
@@ -456,27 +456,21 @@ impl<'a> HashStable<StableHashingContext<'a>> for FileMap {
456456
src_hash.hash_stable(hcx, hasher);
457457

458458
// We only hash the relative position within this filemap
459-
lines.with_lock(|lines| {
460-
lines.len().hash_stable(hcx, hasher);
461-
for &line in lines.iter() {
462-
stable_byte_pos(line, start_pos).hash_stable(hcx, hasher);
463-
}
464-
});
459+
lines.len().hash_stable(hcx, hasher);
460+
for &line in lines.iter() {
461+
stable_byte_pos(line, start_pos).hash_stable(hcx, hasher);
462+
}
465463

466464
// We only hash the relative position within this filemap
467-
multibyte_chars.with_lock(|multibyte_chars| {
468-
multibyte_chars.len().hash_stable(hcx, hasher);
469-
for &char_pos in multibyte_chars.iter() {
470-
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
471-
}
472-
});
465+
multibyte_chars.len().hash_stable(hcx, hasher);
466+
for &char_pos in multibyte_chars.iter() {
467+
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
468+
}
473469

474-
non_narrow_chars.with_lock(|non_narrow_chars| {
475-
non_narrow_chars.len().hash_stable(hcx, hasher);
476-
for &char_pos in non_narrow_chars.iter() {
477-
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
478-
}
479-
});
470+
non_narrow_chars.len().hash_stable(hcx, hasher);
471+
for &char_pos in non_narrow_chars.iter() {
472+
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
473+
}
480474
}
481475
}
482476

src/librustc/ty/query/on_disk_cache.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ impl<'a, 'tcx, 'x> SpecializedDecoder<Span> for CacheDecoder<'a, 'tcx, 'x> {
623623
let len = BytePos::decode(self)?;
624624

625625
let file_lo = self.file_index_to_file(file_lo_index);
626-
let lo = file_lo.lines.borrow()[line_lo - 1] + col_lo;
626+
let lo = file_lo.lines[line_lo - 1] + col_lo;
627627
let hi = lo + len;
628628

629629
let expn_info_tag = u8::decode(self)?;

src/librustc_metadata/decoder.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -1138,9 +1138,9 @@ impl<'a, 'tcx> CrateMetadata {
11381138
src_hash,
11391139
start_pos,
11401140
end_pos,
1141-
lines,
1142-
multibyte_chars,
1143-
non_narrow_chars,
1141+
mut lines,
1142+
mut multibyte_chars,
1143+
mut non_narrow_chars,
11441144
name_hash,
11451145
.. } = filemap_to_import;
11461146

@@ -1151,15 +1151,12 @@ impl<'a, 'tcx> CrateMetadata {
11511151
// `CodeMap::new_imported_filemap()` will then translate those
11521152
// coordinates to their new global frame of reference when the
11531153
// offset of the FileMap is known.
1154-
let mut lines = lines.into_inner();
11551154
for pos in &mut lines {
11561155
*pos = *pos - start_pos;
11571156
}
1158-
let mut multibyte_chars = multibyte_chars.into_inner();
11591157
for mbc in &mut multibyte_chars {
11601158
mbc.pos = mbc.pos - start_pos;
11611159
}
1162-
let mut non_narrow_chars = non_narrow_chars.into_inner();
11631160
for swc in &mut non_narrow_chars {
11641161
*swc = *swc - start_pos;
11651162
}

src/libsyntax/codemap.rs

+32-99
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,7 @@ impl CodeMap {
211211
}
212212
}
213213

214-
/// Creates a new filemap without setting its line information. If you don't
215-
/// intend to set the line information yourself, you should use new_filemap_and_lines.
214+
/// Creates a new filemap.
216215
/// This does not ensure that only one FileMap exists per file name.
217216
pub fn new_filemap(&self, filename: FileName, src: String) -> Lrc<FileMap> {
218217
let start_pos = self.next_start_pos();
@@ -247,22 +246,6 @@ impl CodeMap {
247246
filemap
248247
}
249248

250-
/// Creates a new filemap and sets its line information.
251-
/// This does not ensure that only one FileMap exists per file name.
252-
pub fn new_filemap_and_lines(&self, filename: &Path, src: &str) -> Lrc<FileMap> {
253-
let fm = self.new_filemap(filename.to_owned().into(), src.to_owned());
254-
let mut byte_pos: u32 = fm.start_pos.0;
255-
for line in src.lines() {
256-
// register the start of this line
257-
fm.next_line(BytePos(byte_pos));
258-
259-
// update byte_pos to include this line and the \n at the end
260-
byte_pos += line.len() as u32 + 1;
261-
}
262-
fm
263-
}
264-
265-
266249
/// Allocates a new FileMap representing a source file from an external
267250
/// crate. The source code of such an "imported filemap" is not available,
268251
/// but we still know enough to generate accurate debuginfo location
@@ -305,9 +288,9 @@ impl CodeMap {
305288
external_src: Lock::new(ExternalSource::AbsentOk),
306289
start_pos,
307290
end_pos,
308-
lines: Lock::new(file_local_lines),
309-
multibyte_chars: Lock::new(file_local_multibyte_chars),
310-
non_narrow_chars: Lock::new(file_local_non_narrow_chars),
291+
lines: file_local_lines,
292+
multibyte_chars: file_local_multibyte_chars,
293+
non_narrow_chars: file_local_non_narrow_chars,
311294
name_hash,
312295
});
313296

@@ -345,21 +328,22 @@ impl CodeMap {
345328
match self.lookup_line(pos) {
346329
Ok(FileMapAndLine { fm: f, line: a }) => {
347330
let line = a + 1; // Line numbers start at 1
348-
let linebpos = (*f.lines.borrow())[a];
331+
let linebpos = f.lines[a];
349332
let linechpos = self.bytepos_to_file_charpos(linebpos);
350333
let col = chpos - linechpos;
351334

352335
let col_display = {
353-
let non_narrow_chars = f.non_narrow_chars.borrow();
354-
let start_width_idx = non_narrow_chars
336+
let start_width_idx = f
337+
.non_narrow_chars
355338
.binary_search_by_key(&linebpos, |x| x.pos())
356339
.unwrap_or_else(|x| x);
357-
let end_width_idx = non_narrow_chars
340+
let end_width_idx = f
341+
.non_narrow_chars
358342
.binary_search_by_key(&pos, |x| x.pos())
359343
.unwrap_or_else(|x| x);
360344
let special_chars = end_width_idx - start_width_idx;
361-
let non_narrow: usize =
362-
non_narrow_chars[start_width_idx..end_width_idx]
345+
let non_narrow: usize = f
346+
.non_narrow_chars[start_width_idx..end_width_idx]
363347
.into_iter()
364348
.map(|x| x.width())
365349
.sum();
@@ -380,12 +364,12 @@ impl CodeMap {
380364
}
381365
Err(f) => {
382366
let col_display = {
383-
let non_narrow_chars = f.non_narrow_chars.borrow();
384-
let end_width_idx = non_narrow_chars
367+
let end_width_idx = f
368+
.non_narrow_chars
385369
.binary_search_by_key(&pos, |x| x.pos())
386370
.unwrap_or_else(|x| x);
387-
let non_narrow: usize =
388-
non_narrow_chars[0..end_width_idx]
371+
let non_narrow: usize = f
372+
.non_narrow_chars[0..end_width_idx]
389373
.into_iter()
390374
.map(|x| x.width())
391375
.sum();
@@ -830,22 +814,22 @@ impl CodeMap {
830814
// The number of extra bytes due to multibyte chars in the FileMap
831815
let mut total_extra_bytes = 0;
832816

833-
for mbc in map.multibyte_chars.borrow().iter() {
817+
for mbc in map.multibyte_chars.iter() {
834818
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
835819
if mbc.pos < bpos {
836820
// every character is at least one byte, so we only
837821
// count the actual extra bytes.
838-
total_extra_bytes += mbc.bytes - 1;
822+
total_extra_bytes += mbc.bytes as u32 - 1;
839823
// We should never see a byte position in the middle of a
840824
// character
841-
assert!(bpos.to_usize() >= mbc.pos.to_usize() + mbc.bytes);
825+
assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
842826
} else {
843827
break;
844828
}
845829
}
846830

847-
assert!(map.start_pos.to_usize() + total_extra_bytes <= bpos.to_usize());
848-
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes)
831+
assert!(map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
832+
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes as usize)
849833
}
850834

851835
// Return the index of the filemap (in self.files) which contains pos.
@@ -1028,51 +1012,16 @@ impl FilePathMapping {
10281012
#[cfg(test)]
10291013
mod tests {
10301014
use super::*;
1031-
use std::borrow::Cow;
10321015
use rustc_data_structures::sync::Lrc;
10331016

1034-
#[test]
1035-
fn t1 () {
1036-
let cm = CodeMap::new(FilePathMapping::empty());
1037-
let fm = cm.new_filemap(PathBuf::from("blork.rs").into(),
1038-
"first line.\nsecond line".to_string());
1039-
fm.next_line(BytePos(0));
1040-
// Test we can get lines with partial line info.
1041-
assert_eq!(fm.get_line(0), Some(Cow::from("first line.")));
1042-
// TESTING BROKEN BEHAVIOR: line break declared before actual line break.
1043-
fm.next_line(BytePos(10));
1044-
assert_eq!(fm.get_line(1), Some(Cow::from(".")));
1045-
fm.next_line(BytePos(12));
1046-
assert_eq!(fm.get_line(2), Some(Cow::from("second line")));
1047-
}
1048-
1049-
#[test]
1050-
#[should_panic]
1051-
fn t2 () {
1052-
let cm = CodeMap::new(FilePathMapping::empty());
1053-
let fm = cm.new_filemap(PathBuf::from("blork.rs").into(),
1054-
"first line.\nsecond line".to_string());
1055-
// TESTING *REALLY* BROKEN BEHAVIOR:
1056-
fm.next_line(BytePos(0));
1057-
fm.next_line(BytePos(10));
1058-
fm.next_line(BytePos(2));
1059-
}
1060-
10611017
fn init_code_map() -> CodeMap {
10621018
let cm = CodeMap::new(FilePathMapping::empty());
1063-
let fm1 = cm.new_filemap(PathBuf::from("blork.rs").into(),
1064-
"first line.\nsecond line".to_string());
1065-
let fm2 = cm.new_filemap(PathBuf::from("empty.rs").into(),
1066-
"".to_string());
1067-
let fm3 = cm.new_filemap(PathBuf::from("blork2.rs").into(),
1068-
"first line.\nsecond line".to_string());
1069-
1070-
fm1.next_line(BytePos(0));
1071-
fm1.next_line(BytePos(12));
1072-
fm2.next_line(fm2.start_pos);
1073-
fm3.next_line(fm3.start_pos);
1074-
fm3.next_line(fm3.start_pos + BytePos(12));
1075-
1019+
cm.new_filemap(PathBuf::from("blork.rs").into(),
1020+
"first line.\nsecond line".to_string());
1021+
cm.new_filemap(PathBuf::from("empty.rs").into(),
1022+
"".to_string());
1023+
cm.new_filemap(PathBuf::from("blork2.rs").into(),
1024+
"first line.\nsecond line".to_string());
10761025
cm
10771026
}
10781027

@@ -1125,26 +1074,10 @@ mod tests {
11251074
fn init_code_map_mbc() -> CodeMap {
11261075
let cm = CodeMap::new(FilePathMapping::empty());
11271076
// € is a three byte utf8 char.
1128-
let fm1 =
1129-
cm.new_filemap(PathBuf::from("blork.rs").into(),
1130-
"fir€st €€€€ line.\nsecond line".to_string());
1131-
let fm2 = cm.new_filemap(PathBuf::from("blork2.rs").into(),
1132-
"first line€€.\n€ second line".to_string());
1133-
1134-
fm1.next_line(BytePos(0));
1135-
fm1.next_line(BytePos(28));
1136-
fm2.next_line(fm2.start_pos);
1137-
fm2.next_line(fm2.start_pos + BytePos(20));
1138-
1139-
fm1.record_multibyte_char(BytePos(3), 3);
1140-
fm1.record_multibyte_char(BytePos(9), 3);
1141-
fm1.record_multibyte_char(BytePos(12), 3);
1142-
fm1.record_multibyte_char(BytePos(15), 3);
1143-
fm1.record_multibyte_char(BytePos(18), 3);
1144-
fm2.record_multibyte_char(fm2.start_pos + BytePos(10), 3);
1145-
fm2.record_multibyte_char(fm2.start_pos + BytePos(13), 3);
1146-
fm2.record_multibyte_char(fm2.start_pos + BytePos(18), 3);
1147-
1077+
cm.new_filemap(PathBuf::from("blork.rs").into(),
1078+
"fir€st €€€€ line.\nsecond line".to_string());
1079+
cm.new_filemap(PathBuf::from("blork2.rs").into(),
1080+
"first line€€.\n€ second line".to_string());
11481081
cm
11491082
}
11501083

@@ -1196,7 +1129,7 @@ mod tests {
11961129
let cm = CodeMap::new(FilePathMapping::empty());
11971130
let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
11981131
let selection = " \n ~~\n~~~\n~~~~~ \n \n";
1199-
cm.new_filemap_and_lines(Path::new("blork.rs"), inputtext);
1132+
cm.new_filemap(Path::new("blork.rs").to_owned().into(), inputtext.to_string());
12001133
let span = span_from_selection(inputtext, selection);
12011134

12021135
// check that we are extracting the text we thought we were extracting
@@ -1239,7 +1172,7 @@ mod tests {
12391172
let inputtext = "bbbb BB\ncc CCC\n";
12401173
let selection1 = " ~~\n \n";
12411174
let selection2 = " \n ~~~\n";
1242-
cm.new_filemap_and_lines(Path::new("blork.rs"), inputtext);
1175+
cm.new_filemap(Path::new("blork.rs").to_owned().into(), inputtext.to_owned());
12431176
let span1 = span_from_selection(inputtext, selection1);
12441177
let span2 = span_from_selection(inputtext, selection2);
12451178

src/libsyntax/ext/expand.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1495,17 +1495,19 @@ impl<'a, 'b> Folder for InvocationCollector<'a, 'b> {
14951495

14961496
match String::from_utf8(buf) {
14971497
Ok(src) => {
1498+
let src_interned = Symbol::intern(&src);
1499+
14981500
// Add this input file to the code map to make it available as
14991501
// dependency information
1500-
self.cx.codemap().new_filemap_and_lines(&filename, &src);
1502+
self.cx.codemap().new_filemap(filename.into(), src);
15011503

15021504
let include_info = vec![
15031505
dummy_spanned(ast::NestedMetaItemKind::MetaItem(
15041506
attr::mk_name_value_item_str(Ident::from_str("file"),
15051507
dummy_spanned(file)))),
15061508
dummy_spanned(ast::NestedMetaItemKind::MetaItem(
15071509
attr::mk_name_value_item_str(Ident::from_str("contents"),
1508-
dummy_spanned(Symbol::intern(&src))))),
1510+
dummy_spanned(src_interned)))),
15091511
];
15101512

15111513
let include_ident = Ident::from_str("include");

src/libsyntax/ext/source_util.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,13 @@ pub fn expand_include_str(cx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::TokenT
150150
};
151151
match String::from_utf8(bytes) {
152152
Ok(src) => {
153+
let interned_src = Symbol::intern(&src);
154+
153155
// Add this input file to the code map to make it available as
154156
// dependency information
155-
cx.codemap().new_filemap_and_lines(&file, &src);
157+
cx.codemap().new_filemap(file.into(), src);
156158

157-
base::MacEager::expr(cx.expr_str(sp, Symbol::intern(&src)))
159+
base::MacEager::expr(cx.expr_str(sp, interned_src))
158160
}
159161
Err(_) => {
160162
cx.span_err(sp,
@@ -182,7 +184,7 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::Toke
182184
Ok(..) => {
183185
// Add this input file to the code map to make it available as
184186
// dependency information, but don't enter it's contents
185-
cx.codemap().new_filemap_and_lines(&file, "");
187+
cx.codemap().new_filemap(file.into(), "".to_string());
186188

187189
base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes))))
188190
}

src/libsyntax/parse/lexer/comments.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,11 @@ fn read_block_comment(rdr: &mut StringReader,
240240
let mut lines: Vec<String> = Vec::new();
241241

242242
// Count the number of chars since the start of the line by rescanning.
243-
let mut src_index = rdr.src_index(rdr.filemap.line_begin_pos());
243+
let mut src_index = rdr.src_index(rdr.filemap.line_begin_pos(rdr.pos));
244244
let end_src_index = rdr.src_index(rdr.pos);
245-
assert!(src_index <= end_src_index);
245+
assert!(src_index <= end_src_index,
246+
"src_index={}, end_src_index={}, line_begin_pos={}",
247+
src_index, end_src_index, rdr.filemap.line_begin_pos(rdr.pos).to_u32());
246248
let mut n = 0;
247249
while src_index < end_src_index {
248250
let c = char_at(&rdr.src, src_index);

0 commit comments

Comments
 (0)