@@ -48,7 +48,7 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i,
48
48
// them.
49
49
nonHostChars = [ '%' , '/' , '?' , ';' , '#' ]
50
50
. concat ( unwise ) . concat ( autoEscape ) ,
51
- nonAuthChars = [ '/' , '@' , ' ?', '#' ] . concat ( delims ) ,
51
+ hostEndingChars = [ '/' , '?' , '#' ] ,
52
52
hostnameMaxLen = 255 ,
53
53
hostnamePartPattern = / ^ [ a - z 0 - 9 A - Z _ - ] { 0 , 63 } $ / ,
54
54
hostnamePartStart = / ^ ( [ a - z 0 - 9 A - Z _ - ] { 0 , 63 } ) ( .* ) $ / ,
@@ -90,8 +90,6 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i,
90
90
querystring = require ( 'querystring' ) ;
91
91
92
92
function urlParse ( url , parseQueryString , slashesDenoteHost ) {
93
- if ( url && typeof ( url ) === 'object' && url . href ) return url ;
94
-
95
93
if ( typeof url !== 'string' ) {
96
94
throw new TypeError ( "Parameter 'url' must be a string, not " + typeof url ) ;
97
95
}
@@ -125,57 +123,66 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
125
123
126
124
if ( ! hostlessProtocol [ proto ] &&
127
125
( slashes || ( proto && ! slashedProtocol [ proto ] ) ) ) {
126
+
128
127
// there's a hostname.
129
128
// the first instance of /, ?, ;, or # ends the host.
130
- // don't enforce full RFC correctness, just be unstupid about it.
131
-
129
+ //
132
130
// If there is an @ in the hostname, then non-host chars *are* allowed
133
- // to the left of the first @ sign, unless some non-auth character
131
+ // to the left of the last @ sign, unless some host-ending character
134
132
// comes *before* the @-sign.
135
133
// URLs are obnoxious.
136
- var atSign = rest . indexOf ( '@' ) ;
137
- if ( atSign !== - 1 ) {
138
- var auth = rest . slice ( 0 , atSign ) ;
139
-
140
- // there *may be* an auth
141
- var hasAuth = true ;
142
- for ( var i = 0 , l = nonAuthChars . length ; i < l ; i ++ ) {
143
- if ( auth . indexOf ( nonAuthChars [ i ] ) !== - 1 ) {
144
- // not a valid auth. Something like http://foo.com/bar@baz /
145
- hasAuth = false ;
146
- break ;
147
- }
148
- }
134
+ //
135
+ // ex:
136
+ // http://a@b @c / => user:a@b host:c
137
+ // http://a@b ?@c => user:a host:c path:/?@c
138
+
139
+ // v0.12 TODO(isaacs): This is not quite how Chrome does things.
140
+ // Review our test case against browsers more comprehensively.
141
+
142
+ // find the first instance of any hostEndingChars
143
+ var hostEnd = - 1 ;
144
+ for ( var i = 0 ; i < hostEndingChars . length ; i ++ ) {
145
+ var hec = rest . indexOf ( hostEndingChars [ i ] ) ;
146
+ if ( hec !== - 1 && ( hostEnd === - 1 || hec < hostEnd ) )
147
+ hostEnd = hec ;
148
+ }
149
149
150
- if ( hasAuth ) {
151
- // pluck off the auth portion.
152
- out . auth = decodeURIComponent ( auth ) ;
153
- rest = rest . substr ( atSign + 1 ) ;
154
- }
150
+ // at this point, either we have an explicit point where the
151
+ // auth portion cannot go past, or the last @ char is the decider.
152
+ var auth , atSign ;
153
+ if ( hostEnd === - 1 ) {
154
+ // atSign can be anywhere.
155
+ atSign = rest . lastIndexOf ( '@' ) ;
156
+ } else {
157
+ // atSign must be in auth portion.
158
+ // http://a@b /c@d => host:b auth:a path:/c@d
159
+ atSign = rest . lastIndexOf ( '@' , hostEnd ) ;
155
160
}
156
161
157
- var firstNonHost = - 1 ;
158
- for ( var i = 0 , l = nonHostChars . length ; i < l ; i ++ ) {
159
- var index = rest . indexOf ( nonHostChars [ i ] ) ;
160
- if ( index !== - 1 &&
161
- ( firstNonHost < 0 || index < firstNonHost ) ) firstNonHost = index ;
162
+ // Now we have a portion which is definitely the auth.
163
+ // Pull that off.
164
+ if ( atSign !== - 1 ) {
165
+ auth = rest . slice ( 0 , atSign ) ;
166
+ rest = rest . slice ( atSign + 1 ) ;
167
+ out . auth = decodeURIComponent ( auth ) ;
162
168
}
163
169
164
- if ( firstNonHost !== - 1 ) {
165
- out . host = rest . substr ( 0 , firstNonHost ) ;
166
- rest = rest . substr ( firstNonHost ) ;
167
- } else {
168
- out . host = rest ;
169
- rest = '' ;
170
+ // the host is the remaining to the left of the first non-host char
171
+ hostEnd = - 1 ;
172
+ for ( var i = 0 ; i < nonHostChars . length ; i ++ ) {
173
+ var hec = rest . indexOf ( nonHostChars [ i ] ) ;
174
+ if ( hec !== - 1 && ( hostEnd === - 1 || hec < hostEnd ) )
175
+ hostEnd = hec ;
170
176
}
177
+ // if we still have not hit it, then the entire thing is a host.
178
+ if ( hostEnd === - 1 )
179
+ hostEnd = rest . length ;
180
+
181
+ out . host = rest . slice ( 0 , hostEnd ) ;
182
+ rest = rest . slice ( hostEnd ) ;
171
183
172
184
// pull out port.
173
- var p = parseHost ( out . host ) ;
174
- var keys = Object . keys ( p ) ;
175
- for ( var i = 0 , l = keys . length ; i < l ; i ++ ) {
176
- var key = keys [ i ] ;
177
- out [ key ] = p [ key ] ;
178
- }
185
+ parseHost ( out ) ;
179
186
180
187
// we've indicated that there is a hostname,
181
188
// so even if it's empty, it has to be present.
@@ -187,9 +194,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
187
194
out . hostname [ out . hostname . length - 1 ] === ']' ;
188
195
189
196
// validate a little.
190
- if ( out . hostname . length > hostnameMaxLen ) {
191
- out . hostname = '' ;
192
- } else if ( ! ipv6Hostname ) {
197
+ if ( ! ipv6Hostname ) {
193
198
var hostparts = out . hostname . split ( / \. / ) ;
194
199
for ( var i = 0 , l = hostparts . length ; i < l ; i ++ ) {
195
200
var part = hostparts [ i ] ;
@@ -225,8 +230,12 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
225
230
}
226
231
}
227
232
228
- // hostnames are always lower case.
229
- out . hostname = out . hostname . toLowerCase ( ) ;
233
+ if ( out . hostname . length > hostnameMaxLen ) {
234
+ out . hostname = '' ;
235
+ } else {
236
+ // hostnames are always lower case.
237
+ out . hostname = out . hostname . toLowerCase ( ) ;
238
+ }
230
239
231
240
if ( ! ipv6Hostname ) {
232
241
// IDNA Support: Returns a puny coded representation of "domain".
@@ -243,11 +252,13 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
243
252
out . hostname = newOut . join ( '.' ) ;
244
253
}
245
254
246
- out . host = ( out . hostname || '' ) +
247
- ( ( out . port ) ? ':' + out . port : '' ) ;
255
+ var p = out . port ? ':' + out . port : '' ;
256
+ var h = out . hostname || '' ;
257
+ out . host = h + p ;
248
258
out . href += out . host ;
249
259
250
260
// strip [ and ] from the hostname
261
+ // the host field still retains them, though
251
262
if ( ipv6Hostname ) {
252
263
out . hostname = out . hostname . substr ( 1 , out . hostname . length - 2 ) ;
253
264
if ( rest [ 0 ] !== '/' ) {
@@ -302,8 +313,9 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
302
313
303
314
//to support http.request
304
315
if ( out . pathname || out . search ) {
305
- out . path = ( out . pathname ? out . pathname : '' ) +
306
- ( out . search ? out . search : '' ) ;
316
+ var p = out . pathname || '' ;
317
+ var s = out . search || '' ;
318
+ out . path = p + s ;
307
319
}
308
320
309
321
// finally, reconstruct the href based on what has been validated.
@@ -332,9 +344,9 @@ function urlFormat(obj) {
332
344
host = false ,
333
345
query = '' ;
334
346
335
- if ( obj . host !== undefined ) {
347
+ if ( obj . host ) {
336
348
host = auth + obj . host ;
337
- } else if ( obj . hostname !== undefined ) {
349
+ } else if ( obj . hostname ) {
338
350
host = auth + ( obj . hostname . indexOf ( ':' ) === - 1 ?
339
351
obj . hostname :
340
352
'[' + obj . hostname + ']' ) ;
@@ -365,6 +377,11 @@ function urlFormat(obj) {
365
377
if ( hash && hash . charAt ( 0 ) !== '#' ) hash = '#' + hash ;
366
378
if ( search && search . charAt ( 0 ) !== '?' ) search = '?' + search ;
367
379
380
+ pathname = pathname . replace ( / [ ? # ] / g, function ( match ) {
381
+ return encodeURIComponent ( match ) ;
382
+ } ) ;
383
+ search = search . replace ( '#' , '%23' ) ;
384
+
368
385
return protocol + host + pathname + search + hash ;
369
386
}
370
387
@@ -610,16 +627,15 @@ function urlResolveObject(source, relative) {
610
627
return source ;
611
628
}
612
629
613
- function parseHost ( host ) {
614
- var out = { } ;
630
+ function parseHost ( obj ) {
631
+ var host = obj . host ;
615
632
var port = portPattern . exec ( host ) ;
616
633
if ( port ) {
617
634
port = port [ 0 ] ;
618
635
if ( port !== ':' ) {
619
- out . port = port . substr ( 1 ) ;
636
+ obj . port = port . substr ( 1 ) ;
620
637
}
621
638
host = host . substr ( 0 , host . length - port . length ) ;
622
639
}
623
- if ( host ) out . hostname = host ;
624
- return out ;
640
+ if ( host ) obj . hostname = host ;
625
641
}
0 commit comments