11
11
#![ allow( dead_code) ] // runtime init functions not used during testing
12
12
13
13
use os:: windows:: prelude:: * ;
14
+ use sys:: windows:: os:: current_exe;
14
15
use sys:: c;
15
- use slice;
16
- use ops:: Range ;
17
16
use ffi:: OsString ;
18
- use libc:: { c_int, c_void} ;
19
17
use fmt;
18
+ use collections:: VecDeque ;
19
+ use core:: iter;
20
+ use slice;
21
+ use path:: PathBuf ;
20
22
21
23
pub unsafe fn init ( _argc : isize , _argv : * const * const u8 ) { }
22
24
23
25
pub unsafe fn cleanup ( ) { }
24
26
25
27
pub fn args ( ) -> Args {
26
28
unsafe {
27
- let mut nArgs: c_int = 0 ;
28
- let lpCmdLine = c:: GetCommandLineW ( ) ;
29
- let szArgList = c:: CommandLineToArgvW ( lpCmdLine, & mut nArgs) ;
30
-
31
- // szArcList can be NULL if CommandLinToArgvW failed,
32
- // but in that case nArgs is 0 so we won't actually
33
- // try to read a null pointer
34
- Args { cur : szArgList, range : 0 ..( nArgs as isize ) }
29
+ let lp_cmd_line = c:: GetCommandLineW ( ) ;
30
+ let parsed_args_list = parse_lp_cmd_line (
31
+ lp_cmd_line as * const u16 ,
32
+ || current_exe ( ) . map ( PathBuf :: into_os_string) . unwrap_or_else ( |_| OsString :: new ( ) ) ) ;
33
+
34
+ Args { parsed_args_list : parsed_args_list }
35
35
}
36
36
}
37
37
38
+ /// Implements the Windows command-line argument parsing algorithm, described at
39
+ /// <https://docs.microsoft.com/en-us/previous-versions//17w5ykft(v=vs.85)>.
40
+ ///
41
+ /// Windows includes a function to do this in shell32.dll,
42
+ /// but linking with that DLL causes the process to be registered as a GUI application.
43
+ /// GUI applications add a bunch of overhead, even if no windows are drawn. See
44
+ /// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
45
+ unsafe fn parse_lp_cmd_line < F : Fn ( ) -> OsString > ( lp_cmd_line : * const u16 , exe_name : F )
46
+ -> VecDeque < OsString > {
47
+ const BACKSLASH : u16 = '\\' as u16 ;
48
+ const QUOTE : u16 = '"' as u16 ;
49
+ const TAB : u16 = '\t' as u16 ;
50
+ const SPACE : u16 = ' ' as u16 ;
51
+ let mut in_quotes = false ;
52
+ let mut was_in_quotes = false ;
53
+ let mut backslash_count: usize = 0 ;
54
+ let mut ret_val = VecDeque :: new ( ) ;
55
+ let mut cur = Vec :: new ( ) ;
56
+ if lp_cmd_line. is_null ( ) || * lp_cmd_line == 0 {
57
+ ret_val. push_back ( exe_name ( ) ) ;
58
+ return ret_val;
59
+ }
60
+ let mut i = 0 ;
61
+ // The executable name at the beginning is special.
62
+ match * lp_cmd_line {
63
+ // The executable name ends at the next quote mark,
64
+ // no matter what.
65
+ QUOTE => {
66
+ loop {
67
+ i += 1 ;
68
+ if * lp_cmd_line. offset ( i) == 0 {
69
+ ret_val. push_back ( OsString :: from_wide (
70
+ slice:: from_raw_parts ( lp_cmd_line. offset ( 1 ) , i as usize - 1 )
71
+ ) ) ;
72
+ return ret_val;
73
+ }
74
+ if * lp_cmd_line. offset ( i) == QUOTE {
75
+ break ;
76
+ }
77
+ }
78
+ ret_val. push_back ( OsString :: from_wide (
79
+ slice:: from_raw_parts ( lp_cmd_line. offset ( 1 ) , i as usize - 1 )
80
+ ) ) ;
81
+ i += 1 ;
82
+ }
83
+ // Implement quirk: when they say whitespace here,
84
+ // they include the entire ASCII control plane:
85
+ // "However, if lpCmdLine starts with any amount of whitespace, CommandLineToArgvW
86
+ // will consider the first argument to be an empty string. Excess whitespace at the
87
+ // end of lpCmdLine is ignored."
88
+ 0 ...SPACE => {
89
+ ret_val. push_back ( OsString :: new ( ) ) ;
90
+ i += 1 ;
91
+ } ,
92
+ // The executable name ends at the next quote mark,
93
+ // no matter what.
94
+ _ => {
95
+ loop {
96
+ i += 1 ;
97
+ if * lp_cmd_line. offset ( i) == 0 {
98
+ ret_val. push_back ( OsString :: from_wide (
99
+ slice:: from_raw_parts ( lp_cmd_line, i as usize )
100
+ ) ) ;
101
+ return ret_val;
102
+ }
103
+ if let 0 ...SPACE = * lp_cmd_line. offset ( i) {
104
+ break ;
105
+ }
106
+ }
107
+ ret_val. push_back ( OsString :: from_wide (
108
+ slice:: from_raw_parts ( lp_cmd_line, i as usize )
109
+ ) ) ;
110
+ i += 1 ;
111
+ }
112
+ }
113
+ loop {
114
+ let c = * lp_cmd_line. offset ( i) ;
115
+ match c {
116
+ // backslash
117
+ BACKSLASH => {
118
+ backslash_count += 1 ;
119
+ was_in_quotes = false ;
120
+ } ,
121
+ QUOTE if backslash_count % 2 == 0 => {
122
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count / 2 ) ) ;
123
+ backslash_count = 0 ;
124
+ if was_in_quotes {
125
+ cur. push ( '"' as u16 ) ;
126
+ was_in_quotes = false ;
127
+ } else {
128
+ was_in_quotes = in_quotes;
129
+ in_quotes = !in_quotes;
130
+ }
131
+ }
132
+ QUOTE if backslash_count % 2 != 0 => {
133
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count / 2 ) ) ;
134
+ backslash_count = 0 ;
135
+ was_in_quotes = false ;
136
+ cur. push ( b'"' as u16 ) ;
137
+ }
138
+ SPACE | TAB if !in_quotes => {
139
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
140
+ if !cur. is_empty ( ) || was_in_quotes {
141
+ ret_val. push_back ( OsString :: from_wide ( & cur[ ..] ) ) ;
142
+ cur. truncate ( 0 ) ;
143
+ }
144
+ backslash_count = 0 ;
145
+ was_in_quotes = false ;
146
+ }
147
+ 0x00 => {
148
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
149
+ // include empty quoted strings at the end of the arguments list
150
+ if !cur. is_empty ( ) || was_in_quotes || in_quotes {
151
+ ret_val. push_back ( OsString :: from_wide ( & cur[ ..] ) ) ;
152
+ }
153
+ break ;
154
+ }
155
+ _ => {
156
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
157
+ backslash_count = 0 ;
158
+ was_in_quotes = false ;
159
+ cur. push ( c) ;
160
+ }
161
+ }
162
+ i += 1 ;
163
+ }
164
+ ret_val
165
+ }
166
+
38
167
pub struct Args {
39
- range : Range < isize > ,
40
- cur : * mut * mut u16 ,
168
+ parsed_args_list : VecDeque < OsString > ,
41
169
}
42
170
43
171
pub struct ArgsInnerDebug < ' a > {
@@ -48,14 +176,13 @@ impl<'a> fmt::Debug for ArgsInnerDebug<'a> {
48
176
fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
49
177
f. write_str ( "[" ) ?;
50
178
let mut first = true ;
51
- for i in self . args . range . clone ( ) {
179
+ for i in & self . args . parsed_args_list {
52
180
if !first {
53
181
f. write_str ( ", " ) ?;
54
182
}
55
183
first = false ;
56
184
57
- // Here we do allocation which could be avoided.
58
- fmt:: Debug :: fmt ( & unsafe { os_string_from_ptr ( * self . args . cur . offset ( i) ) } , f) ?;
185
+ fmt:: Debug :: fmt ( i, f) ?;
59
186
}
60
187
f. write_str ( "]" ) ?;
61
188
Ok ( ( ) )
@@ -70,38 +197,79 @@ impl Args {
70
197
}
71
198
}
72
199
73
- unsafe fn os_string_from_ptr ( ptr : * mut u16 ) -> OsString {
74
- let mut len = 0 ;
75
- while * ptr. offset ( len) != 0 { len += 1 ; }
76
-
77
- // Push it onto the list.
78
- let ptr = ptr as * const u16 ;
79
- let buf = slice:: from_raw_parts ( ptr, len as usize ) ;
80
- OsStringExt :: from_wide ( buf)
81
- }
82
-
83
200
impl Iterator for Args {
84
201
type Item = OsString ;
85
- fn next ( & mut self ) -> Option < OsString > {
86
- self . range . next ( ) . map ( |i| unsafe { os_string_from_ptr ( * self . cur . offset ( i) ) } )
202
+ fn next ( & mut self ) -> Option < OsString > { self . parsed_args_list . pop_front ( ) }
203
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
204
+ ( self . parsed_args_list . len ( ) , Some ( self . parsed_args_list . len ( ) ) )
87
205
}
88
- fn size_hint ( & self ) -> ( usize , Option < usize > ) { self . range . size_hint ( ) }
89
206
}
90
207
91
208
impl DoubleEndedIterator for Args {
92
- fn next_back ( & mut self ) -> Option < OsString > {
93
- self . range . next_back ( ) . map ( |i| unsafe { os_string_from_ptr ( * self . cur . offset ( i) ) } )
94
- }
209
+ fn next_back ( & mut self ) -> Option < OsString > { self . parsed_args_list . pop_back ( ) }
95
210
}
96
211
97
212
impl ExactSizeIterator for Args {
98
- fn len ( & self ) -> usize { self . range . len ( ) }
213
+ fn len ( & self ) -> usize { self . parsed_args_list . len ( ) }
99
214
}
100
215
101
- impl Drop for Args {
102
- fn drop ( & mut self ) {
103
- // self.cur can be null if CommandLineToArgvW previously failed,
104
- // but LocalFree ignores NULL pointers
105
- unsafe { c:: LocalFree ( self . cur as * mut c_void ) ; }
216
+ #[ cfg( test) ]
217
+ mod tests {
218
+ use sys:: windows:: args:: * ;
219
+ use ffi:: OsString ;
220
+
221
+ fn chk ( string : & str , parts : & [ & str ] ) {
222
+ let mut wide: Vec < u16 > = OsString :: from ( string) . encode_wide ( ) . collect ( ) ;
223
+ wide. push ( 0 ) ;
224
+ let parsed = unsafe {
225
+ parse_lp_cmd_line ( wide. as_ptr ( ) as * const u16 , || OsString :: from ( "TEST.EXE" ) )
226
+ } ;
227
+ let expected: Vec < OsString > = parts. iter ( ) . map ( |k| OsString :: from ( k) ) . collect ( ) ;
228
+ assert_eq ! ( parsed, expected) ;
229
+ }
230
+
231
+ #[ test]
232
+ fn empty ( ) {
233
+ chk ( "" , & [ "TEST.EXE" ] ) ;
234
+ chk ( "\0 " , & [ "TEST.EXE" ] ) ;
235
+ }
236
+
237
+ #[ test]
238
+ fn single_words ( ) {
239
+ chk ( "EXE one_word" , & [ "EXE" , "one_word" ] ) ;
240
+ chk ( "EXE a" , & [ "EXE" , "a" ] ) ;
241
+ chk ( "EXE 😅" , & [ "EXE" , "😅" ] ) ;
242
+ chk ( "EXE 😅🤦" , & [ "EXE" , "😅🤦" ] ) ;
243
+ }
244
+
245
+ #[ test]
246
+ fn official_examples ( ) {
247
+ chk ( r#"EXE "abc" d e"# , & [ "EXE" , "abc" , "d" , "e" ] ) ;
248
+ chk ( r#"EXE a\\\b d"e f"g h"# , & [ "EXE" , r#"a\\\b"# , "de fg" , "h" ] ) ;
249
+ chk ( r#"EXE a\\\"b c d"# , & [ "EXE" , r#"a\"b"# , "c" , "d" ] ) ;
250
+ chk ( r#"EXE a\\\\"b c" d e"# , & [ "EXE" , r#"a\\b c"# , "d" , "e" ] ) ;
251
+ }
252
+
253
+ #[ test]
254
+ fn whitespace_behavior ( ) {
255
+ chk ( r#" test"# , & [ "" , "test" ] ) ;
256
+ chk ( r#" test"# , & [ "" , "test" ] ) ;
257
+ chk ( r#" test test2"# , & [ "" , "test" , "test2" ] ) ;
258
+ chk ( r#" test test2"# , & [ "" , "test" , "test2" ] ) ;
259
+ chk ( r#"test test2 "# , & [ "test" , "test2" ] ) ;
260
+ chk ( r#"test test2 "# , & [ "test" , "test2" ] ) ;
261
+ chk ( r#"test "# , & [ "test" ] ) ;
262
+ }
263
+
264
+ #[ test]
265
+ fn genius_quotes ( ) {
266
+ chk ( r#"EXE "" """# , & [ "EXE" , "" , "" ] ) ;
267
+ chk ( r#"EXE "" """"# , & [ "EXE" , "" , "\" " ] ) ;
268
+ chk (
269
+ r#"EXE "this is """all""" in the same argument""# ,
270
+ & [ "EXE" , "this is \" all\" in the same argument" ]
271
+ ) ;
272
+ chk ( r#"EXE "\u{1}"""# , & [ "EXE" , "\u{1} \" " ] ) ;
273
+ chk ( r#"EXE "a"" a"# , & [ "EXE" , "a\" " , "a" ] ) ;
106
274
}
107
275
}
0 commit comments