@@ -8,15 +8,12 @@ mod tests;
8
8
pub const MAIN_SEP_STR : & str = "\\ " ;
9
9
pub const MAIN_SEP : char = '\\' ;
10
10
11
- // The unsafety here stems from converting between `&OsStr` and `&[u8]`
12
- // and back. This is safe to do because (1) we only look at ASCII
13
- // contents of the encoding and (2) new &OsStr values are produced
14
- // only from ASCII-bounded slices of existing &OsStr values.
15
- fn os_str_as_u8_slice ( s : & OsStr ) -> & [ u8 ] {
16
- unsafe { mem:: transmute ( s) }
17
- }
18
- unsafe fn u8_slice_as_os_str ( s : & [ u8 ] ) -> & OsStr {
19
- mem:: transmute ( s)
11
+ // Safety: `bytes` must be a valid wtf8 encoded slice
12
+ #[ inline]
13
+ unsafe fn bytes_as_os_str ( bytes : & [ u8 ] ) -> & OsStr {
14
+ // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8,
15
+ // which is compatible with &[u8].
16
+ mem:: transmute ( bytes)
20
17
}
21
18
22
19
#[ inline]
@@ -29,79 +26,116 @@ pub fn is_verbatim_sep(b: u8) -> bool {
29
26
b == b'\\'
30
27
}
31
28
32
- // In most DOS systems, it is not possible to have more than 26 drive letters.
33
- // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
34
- pub fn is_valid_drive_letter ( disk : u8 ) -> bool {
35
- disk. is_ascii_alphabetic ( )
36
- }
37
-
38
29
pub fn parse_prefix ( path : & OsStr ) -> Option < Prefix < ' _ > > {
39
30
use Prefix :: { DeviceNS , Disk , Verbatim , VerbatimDisk , VerbatimUNC , UNC } ;
40
31
41
- let path = os_str_as_u8_slice ( path) ;
42
-
43
- // \\
44
- if let Some ( path) = path. strip_prefix ( br"\\" ) {
45
- // \\?\
46
- if let Some ( path) = path. strip_prefix ( br"?\" ) {
47
- // \\?\UNC\server\share
48
- if let Some ( path) = path. strip_prefix ( br"UNC\" ) {
49
- let ( server, share) = match get_first_two_components ( path, is_verbatim_sep) {
50
- Some ( ( server, share) ) => unsafe {
51
- ( u8_slice_as_os_str ( server) , u8_slice_as_os_str ( share) )
52
- } ,
53
- None => ( unsafe { u8_slice_as_os_str ( path) } , OsStr :: new ( "" ) ) ,
54
- } ;
55
- return Some ( VerbatimUNC ( server, share) ) ;
32
+ if let Some ( path) = strip_prefix ( path, r"\\" ) {
33
+ // \\
34
+ if let Some ( path) = strip_prefix ( path, r"?\" ) {
35
+ // \\?\
36
+ if let Some ( path) = strip_prefix ( path, r"UNC\" ) {
37
+ // \\?\UNC\server\share
38
+
39
+ let ( server, path) = parse_next_component ( path, true ) ;
40
+ let ( share, _) = parse_next_component ( path, true ) ;
41
+
42
+ Some ( VerbatimUNC ( server, share) )
56
43
} else {
57
- // \\?\path
58
- match path {
59
- // \\?\C:\path
60
- [ c, b':' , b'\\' , ..] if is_valid_drive_letter ( * c) => {
61
- return Some ( VerbatimDisk ( c. to_ascii_uppercase ( ) ) ) ;
62
- }
63
- // \\?\cat_pics
64
- _ => {
65
- let idx = path. iter ( ) . position ( |& b| b == b'\\' ) . unwrap_or ( path. len ( ) ) ;
66
- let slice = & path[ ..idx] ;
67
- return Some ( Verbatim ( unsafe { u8_slice_as_os_str ( slice) } ) ) ;
68
- }
44
+ let ( prefix, _) = parse_next_component ( path, true ) ;
45
+
46
+ // in verbatim paths only recognize an exact drive prefix
47
+ if let Some ( drive) = parse_drive_exact ( prefix) {
48
+ // \\?\C:
49
+ Some ( VerbatimDisk ( drive) )
50
+ } else {
51
+ // \\?\prefix
52
+ Some ( Verbatim ( prefix) )
69
53
}
70
54
}
71
- } else if let Some ( path) = path . strip_prefix ( b". \ \ ") {
55
+ } else if let Some ( path) = strip_prefix ( path , r". \") {
72
56
// \\.\COM42
73
- let idx = path. iter ( ) . position ( |& b| b == b'\\' ) . unwrap_or ( path. len ( ) ) ;
74
- let slice = & path[ ..idx] ;
75
- return Some ( DeviceNS ( unsafe { u8_slice_as_os_str ( slice) } ) ) ;
76
- }
77
- match get_first_two_components ( path, is_sep_byte) {
78
- Some ( ( server, share) ) if !server. is_empty ( ) && !share. is_empty ( ) => {
57
+ let ( prefix, _) = parse_next_component ( path, false ) ;
58
+ Some ( DeviceNS ( prefix) )
59
+ } else {
60
+ let ( server, path) = parse_next_component ( path, false ) ;
61
+ let ( share, _) = parse_next_component ( path, false ) ;
62
+
63
+ if !server. is_empty ( ) && !share. is_empty ( ) {
79
64
// \\server\share
80
- return Some ( unsafe { UNC ( u8_slice_as_os_str ( server) , u8_slice_as_os_str ( share) ) } ) ;
65
+ Some ( UNC ( server, share) )
66
+ } else {
67
+ // no valid prefix beginning with "\\" recognized
68
+ None
81
69
}
82
- _ => { }
83
70
}
84
- } else if let [ c , b':' , .. ] = path {
71
+ } else if let Some ( drive ) = parse_drive ( path) {
85
72
// C:
86
- if is_valid_drive_letter ( * c) {
87
- return Some ( Disk ( c. to_ascii_uppercase ( ) ) ) ;
88
- }
73
+ Some ( Disk ( drive) )
74
+ } else {
75
+ // no prefix
76
+ None
89
77
}
90
- None
91
78
}
92
79
93
- /// Returns the first two path components with predicate `f`.
94
- ///
95
- /// The two components returned will be use by caller
96
- /// to construct `VerbatimUNC` or `UNC` Windows path prefix.
97
- ///
98
- /// Returns [`None`] if there are no separators in path.
99
- fn get_first_two_components ( path : & [ u8 ] , f : fn ( u8 ) -> bool ) -> Option < ( & [ u8 ] , & [ u8 ] ) > {
100
- let idx = path. iter ( ) . position ( |& x| f ( x) ) ?;
101
- // Panic safe
102
- // The max `idx+1` is `path.len()` and `path[path.len()..]` is a valid index.
103
- let ( first, path) = ( & path[ ..idx] , & path[ idx + 1 ..] ) ;
104
- let idx = path. iter ( ) . position ( |& x| f ( x) ) . unwrap_or ( path. len ( ) ) ;
105
- let second = & path[ ..idx] ;
106
- Some ( ( first, second) )
80
+ // Parses a drive prefix, e.g. "C:" and "C:\whatever"
81
+ fn parse_drive ( prefix : & OsStr ) -> Option < u8 > {
82
+ // In most DOS systems, it is not possible to have more than 26 drive letters.
83
+ // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
84
+ fn is_valid_drive_letter ( drive : & u8 ) -> bool {
85
+ drive. is_ascii_alphabetic ( )
86
+ }
87
+
88
+ match prefix. bytes ( ) {
89
+ [ drive, b':' , ..] if is_valid_drive_letter ( drive) => Some ( drive. to_ascii_uppercase ( ) ) ,
90
+ _ => None ,
91
+ }
92
+ }
93
+
94
+ // Parses a drive prefix exactly, e.g. "C:"
95
+ fn parse_drive_exact ( prefix : & OsStr ) -> Option < u8 > {
96
+ // only parse two bytes: the drive letter and the drive separator
97
+ if prefix. len ( ) == 2 { parse_drive ( prefix) } else { None }
98
+ }
99
+
100
+ fn strip_prefix < ' a > ( path : & ' a OsStr , prefix : & str ) -> Option < & ' a OsStr > {
101
+ // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]`
102
+ // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice.
103
+ match path. bytes ( ) . strip_prefix ( prefix. as_bytes ( ) ) {
104
+ Some ( path) => unsafe { Some ( bytes_as_os_str ( path) ) } ,
105
+ None => None ,
106
+ }
107
+ }
108
+
109
+ // Parse the next path component.
110
+ //
111
+ // Returns the next component and the rest of the path excluding the component and separator.
112
+ // Does not recognize `/` as a separator character if `verbatim` is true.
113
+ fn parse_next_component ( path : & OsStr , verbatim : bool ) -> ( & OsStr , & OsStr ) {
114
+ let separator = if verbatim { is_verbatim_sep } else { is_sep_byte } ;
115
+
116
+ match path. bytes ( ) . iter ( ) . position ( |& x| separator ( x) ) {
117
+ Some ( separator_start) => {
118
+ let mut separator_end = separator_start + 1 ;
119
+
120
+ // a series of multiple separator characters is treated as a single separator,
121
+ // except in verbatim paths
122
+ while !verbatim && separator_end < path. len ( ) && separator ( path. bytes ( ) [ separator_end] )
123
+ {
124
+ separator_end += 1 ;
125
+ }
126
+
127
+ let component = & path. bytes ( ) [ ..separator_start] ;
128
+
129
+ // Panic safe
130
+ // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
131
+ let path = & path. bytes ( ) [ separator_end..] ;
132
+
133
+ // Safety: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
134
+ // is encoded in a single byte, therefore `bytes[separator_start]` and
135
+ // `bytes[separator_end]` must be code point boundaries and thus
136
+ // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
137
+ unsafe { ( bytes_as_os_str ( component) , bytes_as_os_str ( path) ) }
138
+ }
139
+ None => ( path, OsStr :: new ( "" ) ) ,
140
+ }
107
141
}
0 commit comments