Skip to content

Commit 4fda889

Browse files
committed
Auto merge of rust-lang#115549 - saethlin:include-bytes-resilient, r=jackh726
Fall back to the unoptimized implementation in read_binary_file if File::metadata lies Fixes rust-lang#115458 r? `@jackh726` because you approved the previous PR
2 parents cbce15c + 5f33647 commit 4fda889

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

Diff for: compiler/rustc_span/src/source_map.rs

+31-2
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,39 @@ impl FileLoader for RealFileLoader {
127127

128128
let mut bytes = Lrc::new_uninit_slice(len as usize);
129129
let mut buf = BorrowedBuf::from(Lrc::get_mut(&mut bytes).unwrap());
130-
file.read_buf_exact(buf.unfilled())?;
130+
match file.read_buf_exact(buf.unfilled()) {
131+
Ok(()) => {}
132+
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
133+
drop(bytes);
134+
return fs::read(path).map(Vec::into);
135+
}
136+
Err(e) => return Err(e),
137+
}
131138
// SAFETY: If the read_buf_exact call returns Ok(()), then we have
132139
// read len bytes and initialized the buffer.
133-
Ok(unsafe { bytes.assume_init() })
140+
let bytes = unsafe { bytes.assume_init() };
141+
142+
// At this point, we've read all the bytes that filesystem metadata reported exist.
143+
// But we are not guaranteed to be at the end of the file, because we did not attempt to do
144+
// a read with a non-zero-sized buffer and get Ok(0).
145+
// So we do small read to a fixed-size buffer. If the read returns no bytes then we're
146+
// already done, and we just return the Lrc we built above.
147+
// If the read returns bytes however, we just fall back to reading into a Vec then turning
148+
// that into an Lrc, losing our nice peak memory behavior. This fallback code path should
149+
// be rarely exercised.
150+
151+
let mut probe = [0u8; 32];
152+
let n = loop {
153+
match file.read(&mut probe) {
154+
Ok(0) => return Ok(bytes),
155+
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
156+
Err(e) => return Err(e),
157+
Ok(n) => break n,
158+
}
159+
};
160+
let mut bytes: Vec<u8> = bytes.iter().copied().chain(probe[..n].iter().copied()).collect();
161+
file.read_to_end(&mut bytes)?;
162+
Ok(bytes.into())
134163
}
135164
}
136165

Diff for: compiler/rustc_span/src/source_map/tests.rs

+27
Original file line numberDiff line numberDiff line change
@@ -567,3 +567,30 @@ fn test_next_point() {
567567
assert_eq!(span.hi().0, 6);
568568
assert!(sm.span_to_snippet(span).is_err());
569569
}
570+
571+
#[cfg(target_os = "linux")]
572+
#[test]
573+
fn read_binary_file_handles_lying_stat() {
574+
// read_binary_file tries to read the contents of a file into an Lrc<[u8]> while
575+
// never having two copies of the data in memory at once. This is an optimization
576+
// to support include_bytes! with large files. But since Rust allocators are
577+
// sensitive to alignment, our implementation can't be bootstrapped off calling
578+
// std::fs::read. So we test that we have the same behavior even on files where
579+
// fs::metadata lies.
580+
581+
// stat always says that /proc/self/cmdline is length 0, but it isn't.
582+
let cmdline = Path::new("/proc/self/cmdline");
583+
let len = std::fs::metadata(cmdline).unwrap().len() as usize;
584+
let real = std::fs::read(cmdline).unwrap();
585+
assert!(len < real.len());
586+
let bin = RealFileLoader.read_binary_file(cmdline).unwrap();
587+
assert_eq!(&real[..], &bin[..]);
588+
589+
// stat always says that /sys/devices/system/cpu/kernel_max is the size of a block.
590+
let kernel_max = Path::new("/sys/devices/system/cpu/kernel_max");
591+
let len = std::fs::metadata(kernel_max).unwrap().len() as usize;
592+
let real = std::fs::read(kernel_max).unwrap();
593+
assert!(len > real.len());
594+
let bin = RealFileLoader.read_binary_file(kernel_max).unwrap();
595+
assert_eq!(&real[..], &bin[..]);
596+
}

0 commit comments

Comments
 (0)