Skip to content

Commit 8877b81

Browse files
aturonalexcrichton
authored andcommitted
Add os::join_paths, make setenv non-utf8 capable
This commit changes `os` in three ways: * It adds a `join_paths` function that is the converse to `split_paths`, easing manipulation of the `PATH` environment variable according to platform conventions. * **Breaking change**: It changes `split_paths` to no longer drop empty paths, since they are meaningful to some shells (where they are synonymous with the current working directory). * It changes `setenv` to take a `BytesContainer` rather than a `&str` value, since environment variables may have non-utf8 values on some platforms. Since `&str` is a `BytesContainer`, this is *not* a breaking change. Along the way, it also refactors the `split_paths` function so that `cfg` switches are applied internally (and the function header is given only once). This fixes a bug: the doc comment had an example for only one platform. [breaking-change]
1 parent 748b947 commit 8877b81

File tree

1 file changed

+147
-60
lines changed

1 file changed

+147
-60
lines changed

src/libstd/os.rs

Lines changed: 147 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ use path::{Path, GenericPath, BytesContainer};
4545
use ptr::RawPtr;
4646
use ptr;
4747
use result::{Err, Ok, Result};
48-
use slice::{Vector, ImmutableVector, MutableVector};
48+
use slice::{Vector, ImmutableVector, MutableVector, ImmutableEqVector};
4949
use str::{Str, StrSlice, StrAllocating};
5050
use str;
5151
use string::String;
@@ -398,9 +398,9 @@ pub fn getenv_as_bytes(n: &str) -> Option<Vec<u8>> {
398398
/// None => println!("{} is not defined in the environment.", key)
399399
/// }
400400
/// ```
401-
pub fn setenv(n: &str, v: &str) {
401+
pub fn setenv<T: BytesContainer>(n: &str, v: T) {
402402
#[cfg(unix)]
403-
fn _setenv(n: &str, v: &str) {
403+
fn _setenv(n: &str, v: &[u8]) {
404404
unsafe {
405405
with_env_lock(|| {
406406
n.with_c_str(|nbuf| {
@@ -413,18 +413,20 @@ pub fn setenv(n: &str, v: &str) {
413413
}
414414

415415
#[cfg(windows)]
416-
fn _setenv(n: &str, v: &str) {
416+
fn _setenv(n: &str, v: &[u8]) {
417417
let n: Vec<u16> = n.utf16_units().collect();
418418
let n = n.append_one(0);
419-
let v: Vec<u16> = v.utf16_units().collect();
419+
let v: Vec<u16> = str::from_utf8(v).unwrap().utf16_units().collect();
420420
let v = v.append_one(0);
421+
421422
unsafe {
422423
with_env_lock(|| {
423424
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
424425
})
425426
}
426427
}
427-
_setenv(n, v)
428+
429+
_setenv(n, v.container_as_bytes())
428430
}
429431

430432
/// Remove a variable from the environment entirely.
@@ -453,17 +455,15 @@ pub fn unsetenv(n: &str) {
453455
_unsetenv(n);
454456
}
455457

456-
#[cfg(unix)]
457-
/// Parse a string or vector according to the platform's conventions
458-
/// for the `PATH` environment variable and return a Vec<Path>.
459-
/// Drops empty paths.
458+
/// Parses input according to platform conventions for the `PATH`
459+
/// environment variable.
460460
///
461461
/// # Example
462462
/// ```rust
463463
/// use std::os;
464464
///
465465
/// let key = "PATH";
466-
/// match os::getenv(key) {
466+
/// match os::getenv_as_bytes(key) {
467467
/// Some(paths) => {
468468
/// for path in os::split_paths(paths).iter() {
469469
/// println!("'{}'", path.display());
@@ -473,57 +473,112 @@ pub fn unsetenv(n: &str) {
473473
/// }
474474
/// ```
475475
pub fn split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> {
476-
unparsed.container_as_bytes()
477-
.split(|b| *b == ':' as u8)
478-
.filter(|s| s.len() > 0)
479-
.map(Path::new)
480-
.collect()
481-
}
476+
#[cfg(unix)]
477+
fn _split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> {
478+
unparsed.container_as_bytes()
479+
.split(|b| *b == b':')
480+
.map(Path::new)
481+
.collect()
482+
}
482483

483-
#[cfg(windows)]
484-
/// Parse a string or vector according to the platform's conventions
485-
/// for the `PATH` environment variable. Drops empty paths.
486-
pub fn split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> {
487-
// On Windows, the PATH environment variable is semicolon separated. Double
488-
// quotes are used as a way of introducing literal semicolons (since
489-
// c:\some;dir is a valid Windows path). Double quotes are not themselves
490-
// permitted in path names, so there is no way to escape a double quote.
491-
// Quoted regions can appear in arbitrary locations, so
492-
//
493-
// c:\foo;c:\som"e;di"r;c:\bar
494-
//
495-
// Should parse as [c:\foo, c:\some;dir, c:\bar].
496-
//
497-
// (The above is based on testing; there is no clear reference available
498-
// for the grammar.)
499-
500-
let mut parsed = Vec::new();
501-
let mut in_progress = Vec::new();
502-
let mut in_quote = false;
503-
504-
for b in unparsed.container_as_bytes().iter() {
505-
match *b as char {
506-
';' if !in_quote => {
507-
// ignore zero-length path strings
508-
if in_progress.len() > 0 {
484+
#[cfg(windows)]
485+
pub fn _split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> {
486+
// On Windows, the PATH environment variable is semicolon separated. Double
487+
// quotes are used as a way of introducing literal semicolons (since
488+
// c:\some;dir is a valid Windows path). Double quotes are not themselves
489+
// permitted in path names, so there is no way to escape a double quote.
490+
// Quoted regions can appear in arbitrary locations, so
491+
//
492+
// c:\foo;c:\som"e;di"r;c:\bar
493+
//
494+
// Should parse as [c:\foo, c:\some;dir, c:\bar].
495+
//
496+
// (The above is based on testing; there is no clear reference available
497+
// for the grammar.)
498+
499+
let mut parsed = Vec::new();
500+
let mut in_progress = Vec::new();
501+
let mut in_quote = false;
502+
503+
for b in unparsed.container_as_bytes().iter() {
504+
match *b {
505+
b';' if !in_quote => {
509506
parsed.push(Path::new(in_progress.as_slice()));
507+
in_progress.truncate(0)
508+
}
509+
b'"' => {
510+
in_quote = !in_quote;
511+
}
512+
_ => {
513+
in_progress.push(*b);
510514
}
511-
in_progress.truncate(0)
512-
}
513-
'\"' => {
514-
in_quote = !in_quote;
515515
}
516-
_ => {
517-
in_progress.push(*b);
516+
}
517+
parsed.push(Path::new(in_progress));
518+
parsed
519+
}
520+
521+
_split_paths(unparsed)
522+
}
523+
524+
/// Joins a collection of `Path`s appropriately for the `PATH`
525+
/// environment variable.
526+
///
527+
/// Returns a `Vec<u8>` on success, since `Path`s are not utf-8
528+
/// encoded on all platforms.
529+
///
530+
/// Returns an `Err` (containing an error message) if one of the input
531+
/// `Path`s contains an invalid character for constructing the `PATH`
532+
/// variable (a double quote on Windows or a colon on Unix).
533+
///
534+
/// # Example
535+
///
536+
/// ```rust
537+
/// use std::os;
538+
/// use std::path::Path;
539+
///
540+
/// let key = "PATH";
541+
/// let mut paths = os::getenv_as_bytes(key).map_or(Vec::new(), os::split_paths);
542+
/// paths.push(Path::new("/home/xyz/bin"));
543+
/// os::setenv(key, os::join_paths(paths.as_slice()).unwrap());
544+
/// ```
545+
pub fn join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> {
546+
#[cfg(windows)]
547+
fn _join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> {
548+
let mut joined = Vec::new();
549+
let sep = b';';
550+
551+
for (i, path) in paths.iter().map(|p| p.container_as_bytes()).enumerate() {
552+
if i > 0 { joined.push(sep) }
553+
if path.contains(&b'"') {
554+
return Err("path segment contains `\"`");
555+
} else if path.contains(&sep) {
556+
joined.push(b'"');
557+
joined.push_all(path);
558+
joined.push(b'"');
559+
} else {
560+
joined.push_all(path);
518561
}
519562
}
563+
564+
Ok(joined)
520565
}
521566

522-
if in_progress.len() > 0 {
523-
parsed.push(Path::new(in_progress));
567+
#[cfg(unix)]
568+
fn _join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> {
569+
let mut joined = Vec::new();
570+
let sep = b':';
571+
572+
for (i, path) in paths.iter().map(|p| p.container_as_bytes()).enumerate() {
573+
if i > 0 { joined.push(sep) }
574+
if path.contains(&sep) { return Err("path segment contains separator `:`") }
575+
joined.push_all(path);
576+
}
577+
578+
Ok(joined)
524579
}
525580

526-
parsed
581+
_join_paths(paths)
527582
}
528583

529584
/// A low-level OS in-memory pipe.
@@ -1767,7 +1822,7 @@ mod tests {
17671822
use c_str::ToCStr;
17681823
use option;
17691824
use os::{env, getcwd, getenv, make_absolute};
1770-
use os::{split_paths, setenv, unsetenv};
1825+
use os::{split_paths, join_paths, setenv, unsetenv};
17711826
use os;
17721827
use rand::Rng;
17731828
use rand;
@@ -2032,11 +2087,11 @@ mod tests {
20322087
parsed.iter().map(|s| Path::new(*s)).collect()
20332088
}
20342089

2035-
assert!(check_parse("", []));
2036-
assert!(check_parse(r#""""#, []));
2037-
assert!(check_parse(";;", []));
2090+
assert!(check_parse("", [""]));
2091+
assert!(check_parse(r#""""#, [""]));
2092+
assert!(check_parse(";;", ["", "", ""]));
20382093
assert!(check_parse(r"c:\", [r"c:\"]));
2039-
assert!(check_parse(r"c:\;", [r"c:\"]));
2094+
assert!(check_parse(r"c:\;", [r"c:\", ""]));
20402095
assert!(check_parse(r"c:\;c:\Program Files\",
20412096
[r"c:\", r"c:\Program Files\"]));
20422097
assert!(check_parse(r#"c:\;c:\"foo"\"#, [r"c:\", r"c:\foo\"]));
@@ -2052,12 +2107,44 @@ mod tests {
20522107
parsed.iter().map(|s| Path::new(*s)).collect()
20532108
}
20542109

2055-
assert!(check_parse("", []));
2056-
assert!(check_parse("::", []));
2110+
assert!(check_parse("", [""]));
2111+
assert!(check_parse("::", ["", "", ""]));
20572112
assert!(check_parse("/", ["/"]));
2058-
assert!(check_parse("/:", ["/"]));
2113+
assert!(check_parse("/:", ["/", ""]));
20592114
assert!(check_parse("/:/usr/local", ["/", "/usr/local"]));
20602115
}
20612116

2117+
#[test]
2118+
#[cfg(unix)]
2119+
fn join_paths_unix() {
2120+
fn test_eq(input: &[&str], output: &str) -> bool {
2121+
join_paths(input).unwrap().as_slice() == output.as_bytes()
2122+
}
2123+
2124+
assert!(test_eq([], ""));
2125+
assert!(test_eq(["/bin", "/usr/bin", "/usr/local/bin"],
2126+
"/bin:/usr/bin:/usr/local/bin"));
2127+
assert!(test_eq(["", "/bin", "", "", "/usr/bin", ""],
2128+
":/bin:::/usr/bin:"));
2129+
assert!(join_paths(["/te:st"]).is_err());
2130+
}
2131+
2132+
#[test]
2133+
#[cfg(windows)]
2134+
fn join_paths_windows() {
2135+
fn test_eq(input: &[&str], output: &str) -> bool {
2136+
join_paths(input).unwrap().as_slice() == output.as_bytes()
2137+
}
2138+
2139+
assert!(test_eq([], ""));
2140+
assert!(test_eq([r"c:\windows", r"c:\"],
2141+
r"c:\windows;c:\"));
2142+
assert!(test_eq(["", r"c:\windows", "", "", r"c:\", ""],
2143+
r";c:\windows;;;c:\;"));
2144+
assert!(test_eq([r"c:\te;st", r"c:\"],
2145+
r#""c:\te;st";c:\"#));
2146+
assert!(join_paths([r#"c:\te"st"#]).is_err());
2147+
}
2148+
20622149
// More recursive_mkdir tests are in extra::tempfile
20632150
}

0 commit comments

Comments
 (0)