From 4ff44ff8faabb7e0545dfefa96b0d601f0f17b94 Mon Sep 17 00:00:00 2001 From: Barosl Lee Date: Mon, 17 Aug 2015 09:10:26 +0900 Subject: [PATCH 1/3] libc: Fix constants used by `libc::pathconf` `_PC_NAME_MAX` is necessary to use `libc::pathconf`. Its value is fixed to 3 currently, but actually it varies with the platform. * _PC_NAME_MAX == 3 Linux (glibc): https://sourceware.org/git/?p=glibc.git;a=blob;f=bits/confname.h;h=1c714dfbf9398b8a600f9b69426a7ad8c7e89ab4;hb=HEAD#l32 NaCl (newlib): https://chromium.googlesource.com/native_client/nacl-newlib/+/373135ec5241d09138aa56603742b94b3b64ea1d/newlib/libc/include/sys/unistd.h#430 * _PC_NAME_MAX == 4 Android (Bionic): https://github.com/android/platform_bionic/blob/7e919daeaad62515ebbbf7b06badc77625a14d90/libc/include/unistd.h#L59 FreeBSD: https://svnweb.freebsd.org/base/head/sys/sys/unistd.h?revision=239347&view=markup#l127 NetBSD: http://ftp.netbsd.org/pub/NetBSD/NetBSD-current/src/sys/sys/unistd.h OS X: http://opensource.apple.com/source/xnu/xnu-2782.10.72/bsd/sys/unistd.h This commit fixes this, and also addes the `_PC_PATH_MAX` constant needed by further commits. --- src/liblibc/lib.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/liblibc/lib.rs b/src/liblibc/lib.rs index 5cd806dc9e3f3..c4d2dc838942a 100644 --- a/src/liblibc/lib.rs +++ b/src/liblibc/lib.rs @@ -3920,6 +3920,8 @@ pub mod consts { pub const _SC_XBS5_ILP32_OFFBIG : c_int = 126; pub const _SC_XBS5_LPBIG_OFFBIG : c_int = 128; + pub const _PC_NAME_MAX: c_int = 3; + pub const _PC_PATH_MAX: c_int = 4; } #[cfg(target_os = "nacl")] pub mod sysconf { @@ -3928,6 +3930,9 @@ pub mod consts { pub static _SC_SENDMSG_MAX_SIZE : c_int = 0; pub static _SC_NPROCESSORS_ONLN : c_int = 1; pub static _SC_PAGESIZE : c_int = 2; + + pub const _PC_NAME_MAX: c_int = 3; + pub const _PC_PATH_MAX: c_int = 4; } #[cfg(target_os = "android")] @@ -3963,6 +3968,9 @@ pub mod consts { pub const _SC_STREAM_MAX : c_int = 27; pub const _SC_TZNAME_MAX : c_int = 28; pub const _SC_PAGESIZE : c_int = 39; + + pub const _PC_NAME_MAX: c_int = 4; + pub const _PC_PATH_MAX: c_int = 5; } } @@ -4433,6 +4441,9 @@ pub mod consts { pub const _SC_SEM_VALUE_MAX : c_int = 50; pub const _SC_SIGQUEUE_MAX : c_int = 51; pub const _SC_TIMER_MAX : c_int = 52; + + pub const _PC_NAME_MAX: c_int = 4; + pub const _PC_PATH_MAX: c_int = 5; } } @@ -4868,6 +4879,9 @@ pub mod consts { pub const _SC_SYNCHRONIZED_IO : c_int = 75; pub const _SC_TIMER_MAX : c_int = 93; pub const _SC_TIMERS : c_int = 94; + + pub const _PC_NAME_MAX: c_int = 4; + pub const _PC_PATH_MAX: c_int = 5; } } @@ -5379,6 +5393,9 @@ pub mod consts { pub const _SC_TRACE_SYS_MAX : c_int = 129; pub const _SC_TRACE_USER_EVENT_MAX : c_int = 130; pub const _SC_PASS_MAX : c_int = 131; + + pub const _PC_NAME_MAX: c_int = 4; + pub const _PC_PATH_MAX: c_int = 5; } } } @@ -5835,8 +5852,6 @@ pub mod funcs { use types::os::arch::posix88::{gid_t, off_t, pid_t}; use types::os::arch::posix88::{ssize_t, uid_t}; - pub const _PC_NAME_MAX: c_int = 4; - #[cfg(not(target_os = "nacl"))] extern { pub fn access(path: *const c_char, amode: c_int) -> c_int; From 7723550fdd7fe29bee9dcbd45bdef4f209a7e1f1 Mon Sep 17 00:00:00 2001 From: Barosl Lee Date: Wed, 19 Aug 2015 13:11:40 +0900 Subject: [PATCH 2/3] Reduce the reliance on `PATH_MAX` - Rewrite `std::sys::fs::readlink` not to rely on `PATH_MAX` It currently has the following problems: 1. It uses `_PC_NAME_MAX` to query the maximum length of a file path in the underlying system. However, the meaning of the constant is the maximum length of *a path component*, not a full path. The correct constant should be `_PC_PATH_MAX`. 2. `pathconf` *may* fail if the referred file does not exist. This can be problematic if the file which the symbolic link points to does not exist, but the link itself does exist. In this case, the current implementation resorts to the hard-coded value of `1024`, which is not ideal. 3. There may exist a platform where there is no limit on file path lengths in general. That's the reaon why GNU Hurd doesn't define `PATH_MAX` at all, in addition to having `pathconf` always returning `-1`. In these platforms, the content of the symbolic link can be silently truncated if the length exceeds the hard-coded limit mentioned above. 4. The value obtained by `pathconf` may be outdated at the point of actually calling `readlink`. This is inherently racy. This commit introduces a loop that gradually increases the length of the buffer passed to `readlink`, eliminating the need of `pathconf`. - Remove the arbitrary memory limit of `std::sys::fs::realpath` As per POSIX 2013, `realpath` will return a malloc'ed buffer if the second argument is a null pointer.[1] [1] http://pubs.opengroup.org/onlinepubs/9699919799/functions/realpath.html - Comment on functions that are still using `PATH_MAX` There are some functions that only work in terms of `PATH_MAX`, such as `F_GETPATH` in OS X. Comments on them for posterity. --- src/libstd/sys/unix/fs.rs | 45 ++++++++++++++++++++++++++------------- src/rt/rust_builtin.c | 6 +++++- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/libstd/sys/unix/fs.rs b/src/libstd/sys/unix/fs.rs index cbbdd223dc2b9..0eebe5af9197d 100644 --- a/src/libstd/sys/unix/fs.rs +++ b/src/libstd/sys/unix/fs.rs @@ -376,6 +376,11 @@ impl fmt::Debug for File { #[cfg(target_os = "macos")] fn get_path(fd: c_int) -> Option { + // FIXME: The use of PATH_MAX is generally not encouraged, but it + // is inevitable in this case because OS X defines `fcntl` with + // `F_GETPATH` in terms of `MAXPATHLEN`, and there are no + // alternatives. If a better method is invented, it should be used + // instead. let mut buf = vec![0;libc::PATH_MAX as usize]; let n = unsafe { libc::fcntl(fd, libc::F_GETPATH, buf.as_ptr()) }; if n == -1 { @@ -383,6 +388,7 @@ impl fmt::Debug for File { } let l = buf.iter().position(|&c| c == 0).unwrap(); buf.truncate(l as usize); + buf.shrink_to_fit(); Some(PathBuf::from(OsString::from_vec(buf))) } @@ -466,18 +472,27 @@ pub fn rmdir(p: &Path) -> io::Result<()> { pub fn readlink(p: &Path) -> io::Result { let c_path = try!(cstr(p)); let p = c_path.as_ptr(); - let mut len = unsafe { libc::pathconf(p as *mut _, libc::_PC_NAME_MAX) }; - if len < 0 { - len = 1024; // FIXME: read PATH_MAX from C ffi? - } - let mut buf: Vec = Vec::with_capacity(len as usize); - unsafe { - let n = try!(cvt({ - libc::readlink(p, buf.as_ptr() as *mut c_char, len as size_t) - })); - buf.set_len(n as usize); + + let mut buf = Vec::with_capacity(256); + + loop { + let buf_read = try!(cvt(unsafe { + libc::readlink(p, buf.as_mut_ptr() as *mut _, buf.capacity() as libc::size_t) + })) as usize; + + unsafe { buf.set_len(buf_read); } + + if buf_read != buf.capacity() { + buf.shrink_to_fit(); + + return Ok(PathBuf::from(OsString::from_vec(buf))); + } + + // Trigger the internal buffer resizing logic of `Vec` by requiring + // more space than the current capacity. The length is guaranteed to be + // the same as the capacity due to the if statement above. + buf.reserve(1); } - Ok(PathBuf::from(OsString::from_vec(buf))) } pub fn symlink(src: &Path, dst: &Path) -> io::Result<()> { @@ -514,15 +529,15 @@ pub fn lstat(p: &Path) -> io::Result { pub fn canonicalize(p: &Path) -> io::Result { let path = try!(CString::new(p.as_os_str().as_bytes())); - let mut buf = vec![0u8; 16 * 1024]; + let buf; unsafe { - let r = c::realpath(path.as_ptr(), buf.as_mut_ptr() as *mut _); + let r = c::realpath(path.as_ptr(), ptr::null_mut()); if r.is_null() { return Err(io::Error::last_os_error()) } + buf = CStr::from_ptr(r).to_bytes().to_vec(); + libc::free(r as *mut _); } - let p = buf.iter().position(|i| *i == 0).unwrap(); - buf.truncate(p); Ok(PathBuf::from(OsString::from_vec(buf))) } diff --git a/src/rt/rust_builtin.c b/src/rt/rust_builtin.c index 945057f86122e..37ce30d7066f7 100644 --- a/src/rt/rust_builtin.c +++ b/src/rt/rust_builtin.c @@ -341,7 +341,11 @@ const char * rust_current_exe() char **paths; size_t sz; int i; - char buf[2*PATH_MAX], exe[2*PATH_MAX]; + /* If `PATH_MAX` is defined on the platform, `realpath` will truncate the + * resolved path up to `PATH_MAX`. While this can make the resolution fail if + * the executable is placed in a deep path, the usage of a buffer whose + * length depends on `PATH_MAX` is still memory safe. */ + char buf[2*PATH_MAX], exe[PATH_MAX]; if (self != NULL) return self; From 6065678e627643cc3275c677408d11f48802595e Mon Sep 17 00:00:00 2001 From: Barosl Lee Date: Wed, 26 Aug 2015 21:27:32 +0900 Subject: [PATCH 3/3] Use a different buffer doubling logic for `std::sys::os::getcwd` Make `std::sys::os::getcwd` call `Vec::reserve(1)` followed by `Vec::set_len` to double the buffer. This is to align with other similar functions, such as: - `std::sys_common::io::read_to_end_uninitialized` - `std::sys::fs::readlink` Also, reduce the initial buffer size from 2048 to 512. The previous size was introduced with 4bc26ce in 2013, but it seems a bit excessive. This is probably because buffer doubling was not implemented back then. --- src/libstd/sys/unix/os.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/libstd/sys/unix/os.rs b/src/libstd/sys/unix/os.rs index 2b6b50a1a56d7..fa31ac682d40b 100644 --- a/src/libstd/sys/unix/os.rs +++ b/src/libstd/sys/unix/os.rs @@ -30,7 +30,6 @@ use sys::c; use sys::fd; use vec; -const GETCWD_BUF_BYTES: usize = 2048; const TMPBUF_SZ: usize = 128; /// Returns the platform-specific value of errno @@ -94,11 +93,9 @@ pub fn error_string(errno: i32) -> String { } pub fn getcwd() -> io::Result { - let mut buf = Vec::new(); - let mut n = GETCWD_BUF_BYTES; + let mut buf = Vec::with_capacity(512); loop { unsafe { - buf.reserve(n); let ptr = buf.as_mut_ptr() as *mut libc::c_char; if !libc::getcwd(ptr, buf.capacity() as libc::size_t).is_null() { let len = CStr::from_ptr(buf.as_ptr() as *const libc::c_char).to_bytes().len(); @@ -111,7 +108,12 @@ pub fn getcwd() -> io::Result { return Err(error); } } - n *= 2; + + // Trigger the internal buffer resizing logic of `Vec` by requiring + // more space than the current capacity. + let cap = buf.capacity(); + buf.set_len(cap); + buf.reserve(1); } } }