Skip to content

Commit 42e71bb

Browse files
committed
rustc_metadata: Preprocess search paths for better performance
Over in Zed we've noticed that loading crates for a large-ish workspace can take almost 200ms. We've pinned it down to how rustc searches for paths, as it performs a linear search over the list of candidate paths. In our case the candidate list had about 20k entries which we had to iterate over for each dependency being loaded. This commit introduces a simple FilesIndex that's just a sorted Vec under the hood. Since crates are looked up by both prefix and suffix, we perform a range search on said Vec (which constraints the search space based on prefix) and follow up with a linear scan of entries with matching suffixes. FilesIndex is also pre-filtered before any queries are performed using available target information; query prefixes/sufixes are based on the target we are compiling for, so we can remove entries that can never match up front. Overall, this commit brings down build time for us in dev scenarios by about 6%. 100ms might not seem like much, but this is a constant cost that each of our workspace crates has to pay, even when said crate is miniscule.
1 parent 251dc8a commit 42e71bb

File tree

8 files changed

+137
-76
lines changed

8 files changed

+137
-76
lines changed

Diff for: compiler/rustc_codegen_ssa/src/back/link.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1647,7 +1647,7 @@ fn get_object_file_path(sess: &Session, name: &str, self_contained: bool) -> Pat
16471647
return file_path;
16481648
}
16491649
}
1650-
for search_path in sess.target_filesearch(PathKind::Native).search_paths() {
1650+
for search_path in sess.target_filesearch().search_paths(PathKind::Native) {
16511651
let file_path = search_path.dir.join(name);
16521652
if file_path.exists() {
16531653
return file_path;

Diff for: compiler/rustc_interface/src/passes.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ fn configure_and_expand(
175175
if cfg!(windows) {
176176
old_path = env::var_os("PATH").unwrap_or(old_path);
177177
let mut new_path = Vec::from_iter(
178-
sess.host_filesearch(PathKind::All).search_paths().map(|p| p.dir.clone()),
178+
sess.host_filesearch().search_paths(PathKind::All).map(|p| p.dir.clone()),
179179
);
180180
for path in env::split_paths(&old_path) {
181181
if !new_path.contains(&path) {

Diff for: compiler/rustc_metadata/src/creader.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,8 @@ impl<'a, 'tcx> CrateLoader<'a, 'tcx> {
507507
locator.is_proc_macro = true;
508508
locator.target = &self.sess.host;
509509
locator.tuple = TargetTuple::from_tuple(config::host_tuple());
510-
locator.filesearch = self.sess.host_filesearch(path_kind);
510+
locator.filesearch = self.sess.host_filesearch();
511+
locator.path_kind = path_kind;
511512

512513
let Some(host_result) = self.load(locator)? else {
513514
return Ok(None);

Diff for: compiler/rustc_metadata/src/locator.rs

+44-40
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,10 @@ pub(crate) struct CrateLocator<'a> {
253253
extra_filename: Option<&'a str>,
254254
pub target: &'a Target,
255255
pub tuple: TargetTuple,
256-
pub filesearch: FileSearch<'a>,
256+
pub filesearch: &'a FileSearch,
257257
pub is_proc_macro: bool,
258258

259+
pub path_kind: PathKind,
259260
// Mutable in-progress state or output.
260261
crate_rejections: CrateRejections,
261262
}
@@ -339,7 +340,8 @@ impl<'a> CrateLocator<'a> {
339340
extra_filename,
340341
target: &sess.target,
341342
tuple: sess.opts.target_triple.clone(),
342-
filesearch: sess.target_filesearch(path_kind),
343+
filesearch: sess.target_filesearch(),
344+
path_kind,
343345
is_proc_macro: false,
344346
crate_rejections: CrateRejections::default(),
345347
}
@@ -407,47 +409,49 @@ impl<'a> CrateLocator<'a> {
407409
// given that `extra_filename` comes from the `-C extra-filename`
408410
// option and thus can be anything, and the incorrect match will be
409411
// handled safely in `extract_one`.
410-
for search_path in self.filesearch.search_paths() {
412+
for search_path in self.filesearch.search_paths(self.path_kind) {
411413
debug!("searching {}", search_path.dir.display());
412-
for spf in search_path.files.iter() {
413-
debug!("testing {}", spf.path.display());
414+
let spf = &search_path.files;
414415

415-
let f = &spf.file_name_str;
416-
let (hash, kind) = if let Some(f) = f.strip_prefix(rlib_prefix)
417-
&& let Some(f) = f.strip_suffix(rlib_suffix)
418-
{
419-
(f, CrateFlavor::Rlib)
420-
} else if let Some(f) = f.strip_prefix(rmeta_prefix)
421-
&& let Some(f) = f.strip_suffix(rmeta_suffix)
422-
{
423-
(f, CrateFlavor::Rmeta)
424-
} else if let Some(f) = f.strip_prefix(dylib_prefix)
425-
&& let Some(f) = f.strip_suffix(dylib_suffix.as_ref())
426-
{
427-
(f, CrateFlavor::Dylib)
428-
} else {
429-
if f.starts_with(staticlib_prefix) && f.ends_with(staticlib_suffix.as_ref()) {
430-
self.crate_rejections.via_kind.push(CrateMismatch {
431-
path: spf.path.clone(),
432-
got: "static".to_string(),
433-
});
434-
}
435-
continue;
436-
};
437-
438-
info!("lib candidate: {}", spf.path.display());
416+
let mut should_check_staticlibs = true;
417+
for (prefix, suffix, kind) in [
418+
(rlib_prefix.as_str(), rlib_suffix, CrateFlavor::Rlib),
419+
(rmeta_prefix.as_str(), rmeta_suffix, CrateFlavor::Rmeta),
420+
(dylib_prefix, dylib_suffix, CrateFlavor::Dylib),
421+
] {
422+
if prefix == staticlib_prefix && suffix == staticlib_suffix {
423+
should_check_staticlibs = false;
424+
}
425+
if let Some(matches) = spf.query(prefix, suffix) {
426+
for (hash, spf) in matches {
427+
info!("lib candidate: {}", spf.path.display());
439428

440-
let (rlibs, rmetas, dylibs) = candidates.entry(hash.to_string()).or_default();
441-
let path = try_canonicalize(&spf.path).unwrap_or_else(|_| spf.path.clone());
442-
if seen_paths.contains(&path) {
443-
continue;
444-
};
445-
seen_paths.insert(path.clone());
446-
match kind {
447-
CrateFlavor::Rlib => rlibs.insert(path, search_path.kind),
448-
CrateFlavor::Rmeta => rmetas.insert(path, search_path.kind),
449-
CrateFlavor::Dylib => dylibs.insert(path, search_path.kind),
450-
};
429+
let (rlibs, rmetas, dylibs) =
430+
candidates.entry(hash.to_string()).or_default();
431+
let path =
432+
try_canonicalize(&spf.path).unwrap_or_else(|_| spf.path.to_path_buf());
433+
if seen_paths.contains(&path) {
434+
continue;
435+
};
436+
seen_paths.insert(path.clone());
437+
match kind {
438+
CrateFlavor::Rlib => rlibs.insert(path, search_path.kind),
439+
CrateFlavor::Rmeta => rmetas.insert(path, search_path.kind),
440+
CrateFlavor::Dylib => dylibs.insert(path, search_path.kind),
441+
};
442+
}
443+
}
444+
}
445+
if let Some(static_matches) = should_check_staticlibs
446+
.then(|| spf.query(staticlib_prefix, staticlib_suffix))
447+
.flatten()
448+
{
449+
for (_, spf) in static_matches {
450+
self.crate_rejections.via_kind.push(CrateMismatch {
451+
path: spf.path.to_path_buf(),
452+
got: "static".to_string(),
453+
});
454+
}
451455
}
452456
}
453457

Diff for: compiler/rustc_metadata/src/native_libs.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ pub fn walk_native_lib_search_dirs<R>(
2828
mut f: impl FnMut(&Path, bool /*is_framework*/) -> ControlFlow<R>,
2929
) -> ControlFlow<R> {
3030
// Library search paths explicitly supplied by user (`-L` on the command line).
31-
for search_path in sess.target_filesearch(PathKind::Native).cli_search_paths() {
31+
for search_path in sess.target_filesearch().cli_search_paths(PathKind::Native) {
3232
f(&search_path.dir, false)?;
3333
}
34-
for search_path in sess.target_filesearch(PathKind::Framework).cli_search_paths() {
34+
for search_path in sess.target_filesearch().cli_search_paths(PathKind::Framework) {
3535
// Frameworks are looked up strictly in framework-specific paths.
3636
if search_path.kind != PathKind::All {
3737
f(&search_path.dir, true)?;

Diff for: compiler/rustc_session/src/filesearch.rs

+23-16
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,44 @@ use std::path::{Path, PathBuf};
44
use std::{env, fs};
55

66
use rustc_fs_util::{fix_windows_verbatim_for_gcc, try_canonicalize};
7+
use rustc_target::spec::Target;
78
use smallvec::{SmallVec, smallvec};
89

910
use crate::search_paths::{PathKind, SearchPath};
1011

1112
#[derive(Clone)]
12-
pub struct FileSearch<'a> {
13-
cli_search_paths: &'a [SearchPath],
14-
tlib_path: &'a SearchPath,
15-
kind: PathKind,
13+
pub struct FileSearch {
14+
cli_search_paths: Vec<SearchPath>,
15+
tlib_path: SearchPath,
1616
}
1717

18-
impl<'a> FileSearch<'a> {
19-
pub fn cli_search_paths(&self) -> impl Iterator<Item = &'a SearchPath> {
20-
let kind = self.kind;
18+
impl FileSearch {
19+
pub fn cli_search_paths<'b>(&'b self, kind: PathKind) -> impl Iterator<Item = &'b SearchPath> {
2120
self.cli_search_paths.iter().filter(move |sp| sp.kind.matches(kind))
2221
}
2322

24-
pub fn search_paths(&self) -> impl Iterator<Item = &'a SearchPath> {
25-
let kind = self.kind;
23+
pub fn search_paths<'b>(&'b self, kind: PathKind) -> impl Iterator<Item = &'b SearchPath> {
2624
self.cli_search_paths
2725
.iter()
2826
.filter(move |sp| sp.kind.matches(kind))
29-
.chain(std::iter::once(self.tlib_path))
27+
.chain(std::iter::once(&self.tlib_path))
3028
}
3129

32-
pub fn new(
33-
cli_search_paths: &'a [SearchPath],
34-
tlib_path: &'a SearchPath,
35-
kind: PathKind,
36-
) -> FileSearch<'a> {
37-
FileSearch { cli_search_paths, tlib_path, kind }
30+
pub fn new(cli_search_paths: &[SearchPath], tlib_path: &SearchPath, target: &Target) -> Self {
31+
let this = FileSearch {
32+
cli_search_paths: cli_search_paths.to_owned(),
33+
tlib_path: tlib_path.clone(),
34+
};
35+
this.refine(&["lib", &target.staticlib_prefix, &target.dll_prefix])
36+
}
37+
// Produce a new file search from this search that has a smaller set of candidates.
38+
fn refine(mut self, allowed_prefixes: &[&str]) -> FileSearch {
39+
self.cli_search_paths
40+
.iter_mut()
41+
.for_each(|search_paths| search_paths.files.retain(allowed_prefixes));
42+
self.tlib_path.files.retain(allowed_prefixes);
43+
44+
self
3845
}
3946
}
4047

Diff for: compiler/rustc_session/src/search_paths.rs

+50-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::path::{Path, PathBuf};
2+
use std::sync::Arc;
23

34
use rustc_macros::{Decodable, Encodable, HashStable_Generic};
45
use rustc_target::spec::TargetTuple;
@@ -10,9 +11,44 @@ use crate::filesearch::make_target_lib_path;
1011
pub struct SearchPath {
1112
pub kind: PathKind,
1213
pub dir: PathBuf,
13-
pub files: Vec<SearchPathFile>,
14+
pub files: FilesIndex,
1415
}
1516

17+
/// [FilesIndex] contains paths that can be efficiently looked up with (prefix, suffix) pairs.
18+
#[derive(Clone, Debug)]
19+
pub struct FilesIndex(Vec<(Arc<str>, SearchPathFile)>);
20+
21+
impl FilesIndex {
22+
/// Look up [SearchPathFile] by (prefix, suffix) pair.
23+
pub fn query<'this, 'prefix, 'suffix>(
24+
&'this self,
25+
prefix: &'prefix str,
26+
suffix: &'suffix str,
27+
) -> Option<impl Iterator<Item = (String, &'this SearchPathFile)> + use<'this, 'prefix, 'suffix>>
28+
{
29+
let start = self.0.partition_point(|(k, _)| **k < *prefix);
30+
if start == self.0.len() {
31+
return None;
32+
}
33+
let end = self.0[start..].partition_point(|(k, _)| k.starts_with(prefix));
34+
let prefixed_items = &self.0[start..][..end];
35+
36+
let ret = prefixed_items.into_iter().filter_map(move |(k, v)| {
37+
k.ends_with(suffix).then(|| {
38+
(
39+
String::from(
40+
&v.file_name_str[prefix.len()..v.file_name_str.len() - suffix.len()],
41+
),
42+
v,
43+
)
44+
})
45+
});
46+
Some(ret)
47+
}
48+
pub fn retain(&mut self, prefixes: &[&str]) {
49+
self.0.retain(|(k, _)| prefixes.iter().any(|prefix| k.starts_with(prefix)));
50+
}
51+
}
1652
/// The obvious implementation of `SearchPath::files` is a `Vec<PathBuf>`. But
1753
/// it is searched repeatedly by `find_library_crate`, and the searches involve
1854
/// checking the prefix and suffix of the filename of each `PathBuf`. This is
@@ -26,8 +62,8 @@ pub struct SearchPath {
2662
/// UTF-8, and so a non-UTF-8 filename couldn't be one we're looking for.)
2763
#[derive(Clone, Debug)]
2864
pub struct SearchPathFile {
29-
pub path: PathBuf,
30-
pub file_name_str: String,
65+
pub path: Arc<Path>,
66+
pub file_name_str: Arc<str>,
3167
}
3268

3369
#[derive(PartialEq, Clone, Copy, Debug, Hash, Eq, Encodable, Decodable, HashStable_Generic)]
@@ -98,20 +134,25 @@ impl SearchPath {
98134

99135
pub fn new(kind: PathKind, dir: PathBuf) -> Self {
100136
// Get the files within the directory.
101-
let files = match std::fs::read_dir(&dir) {
137+
let mut files = match std::fs::read_dir(&dir) {
102138
Ok(files) => files
103139
.filter_map(|e| {
104140
e.ok().and_then(|e| {
105-
e.file_name().to_str().map(|s| SearchPathFile {
106-
path: e.path(),
107-
file_name_str: s.to_string(),
141+
e.file_name().to_str().map(|s| {
142+
let file_name_str: Arc<str> = s.into();
143+
(Arc::clone(&file_name_str), SearchPathFile {
144+
path: e.path().into(),
145+
file_name_str,
146+
})
108147
})
109148
})
110149
})
111150
.collect::<Vec<_>>(),
112-
Err(..) => vec![],
113-
};
114151

152+
Err(..) => Default::default(),
153+
};
154+
files.sort_by(|(lhs, _), (rhs, _)| lhs.cmp(rhs));
155+
let files = FilesIndex(files);
115156
SearchPath { kind, dir, files }
116157
}
117158
}

Diff for: compiler/rustc_session/src/session.rs

+14-6
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ use crate::config::{
4444
InstrumentCoverage, OptLevel, OutFileName, OutputType, RemapPathScopeComponents,
4545
SwitchWithOptPath,
4646
};
47+
use crate::filesearch::FileSearch;
4748
use crate::parse::{ParseSess, add_feature_diagnostics};
48-
use crate::search_paths::{PathKind, SearchPath};
49+
use crate::search_paths::SearchPath;
4950
use crate::{errors, filesearch, lint};
5051

5152
struct OptimizationFuel {
@@ -218,6 +219,9 @@ pub struct Session {
218219
/// This is mainly useful for other tools that reads that debuginfo to figure out
219220
/// how to call the compiler with the same arguments.
220221
pub expanded_args: Vec<String>,
222+
223+
target_filesearch: FileSearch,
224+
host_filesearch: FileSearch,
221225
}
222226

223227
#[derive(PartialEq, Eq, PartialOrd, Ord)]
@@ -443,11 +447,11 @@ impl Session {
443447
format!("__rustc_proc_macro_decls_{:08x}__", stable_crate_id.as_u64())
444448
}
445449

446-
pub fn target_filesearch(&self, kind: PathKind) -> filesearch::FileSearch<'_> {
447-
filesearch::FileSearch::new(&self.opts.search_paths, &self.target_tlib_path, kind)
450+
pub fn target_filesearch(&self) -> &filesearch::FileSearch {
451+
&self.target_filesearch
448452
}
449-
pub fn host_filesearch(&self, kind: PathKind) -> filesearch::FileSearch<'_> {
450-
filesearch::FileSearch::new(&self.opts.search_paths, &self.host_tlib_path, kind)
453+
pub fn host_filesearch(&self) -> &filesearch::FileSearch {
454+
&self.host_filesearch
451455
}
452456

453457
/// Returns a list of directories where target-specific tool binaries are located. Some fallback
@@ -1111,7 +1115,9 @@ pub fn build_session(
11111115
});
11121116

11131117
let asm_arch = if target.allow_asm { InlineAsmArch::from_str(&target.arch).ok() } else { None };
1114-
1118+
let target_filesearch =
1119+
filesearch::FileSearch::new(&sopts.search_paths, &target_tlib_path, &target);
1120+
let host_filesearch = filesearch::FileSearch::new(&sopts.search_paths, &host_tlib_path, &host);
11151121
let sess = Session {
11161122
target,
11171123
host,
@@ -1138,6 +1144,8 @@ pub fn build_session(
11381144
cfg_version,
11391145
using_internal_features,
11401146
expanded_args,
1147+
target_filesearch,
1148+
host_filesearch,
11411149
};
11421150

11431151
validate_commandline_args_with_session_available(&sess);

0 commit comments

Comments
 (0)