@@ -253,22 +253,27 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:
253
253
parsed_userinfo != "" or parsed_host != "" or parsed_port is not None
254
254
)
255
255
validate_path (path , has_scheme = has_scheme , has_authority = has_authority )
256
- if has_authority :
256
+ if has_scheme or has_authority :
257
257
path = normalize_path (path )
258
258
259
259
# The GEN_DELIMS set is... : / ? # [ ] @
260
260
# These do not need to be percent-quoted unless they serve as delimiters for the
261
261
# specific component.
262
+ WHATWG_SAFE = '`{}%|^\\ "'
262
263
263
264
# For 'path' we need to drop ? and # from the GEN_DELIMS set.
264
- parsed_path : str = quote (path , safe = SUB_DELIMS + ":/[]@" )
265
+ parsed_path : str = quote (path , safe = SUB_DELIMS + WHATWG_SAFE + ":/[]@" )
265
266
# For 'query' we need to drop '#' from the GEN_DELIMS set.
266
267
parsed_query : str | None = (
267
- None if query is None else quote (query , safe = SUB_DELIMS + ":/?[]@" )
268
+ None
269
+ if query is None
270
+ else quote (query , safe = SUB_DELIMS + WHATWG_SAFE + ":/?[]@" )
268
271
)
269
272
# For 'fragment' we can include all of the GEN_DELIMS set.
270
273
parsed_fragment : str | None = (
271
- None if fragment is None else quote (fragment , safe = SUB_DELIMS + ":/?#[]@" )
274
+ None
275
+ if fragment is None
276
+ else quote (fragment , safe = SUB_DELIMS + WHATWG_SAFE + ":/?#[]@" )
272
277
)
273
278
274
279
# The parsed ASCII bytestrings are our canonical form.
@@ -321,7 +326,8 @@ def encode_host(host: str) -> str:
321
326
# From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2
322
327
#
323
328
# reg-name = *( unreserved / pct-encoded / sub-delims )
324
- return quote (host .lower (), safe = SUB_DELIMS )
329
+ WHATWG_SAFE = '"`{}%|\\ '
330
+ return quote (host .lower (), safe = SUB_DELIMS + WHATWG_SAFE )
325
331
326
332
# IDNA hostnames
327
333
try :
@@ -369,19 +375,17 @@ def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None:
369
375
# must either be empty or begin with a slash ("/") character."
370
376
if path and not path .startswith ("/" ):
371
377
raise InvalidURL ("For absolute URLs, path must be empty or begin with '/'" )
372
- else :
378
+
379
+ if not has_scheme and not has_authority :
373
380
# If a URI does not contain an authority component, then the path cannot begin
374
381
# with two slash characters ("//").
375
382
if path .startswith ("//" ):
376
- raise InvalidURL (
377
- "URLs with no authority component cannot have a path starting with '//'"
378
- )
383
+ raise InvalidURL ("Relative URLs cannot have a path starting with '//'" )
384
+
379
385
# In addition, a URI reference (Section 4.1) may be a relative-path reference,
380
386
# in which case the first path segment cannot contain a colon (":") character.
381
- if path .startswith (":" ) and not has_scheme :
382
- raise InvalidURL (
383
- "URLs with no scheme component cannot have a path starting with ':'"
384
- )
387
+ if path .startswith (":" ):
388
+ raise InvalidURL ("Relative URLs cannot have a path starting with ':'" )
385
389
386
390
387
391
def normalize_path (path : str ) -> str :
0 commit comments