3
3
import collections
4
4
import re
5
5
import string
6
+ import sys
6
7
import zlib
7
8
from contextlib import suppress
8
9
from enum import IntEnum
9
10
from typing import (
10
11
Any ,
12
+ ClassVar ,
11
13
Generic ,
12
14
List ,
13
15
NamedTuple ,
26
28
27
29
from . import hdrs
28
30
from .base_protocol import BaseProtocol
29
- from .helpers import NO_EXTENSIONS , BaseTimerContext
31
+ from .helpers import DEBUG , NO_EXTENSIONS , BaseTimerContext
30
32
from .http_exceptions import (
31
33
BadHttpMessage ,
32
34
BadStatusLine ,
41
43
from .streams import EMPTY_PAYLOAD , StreamReader
42
44
from .typedefs import Final , RawHeaders
43
45
46
+ if sys .version_info >= (3 , 8 ):
47
+ from typing import Literal
48
+ else :
49
+ from typing_extensions import Literal
50
+
44
51
try :
45
52
import brotli
46
53
58
65
"RawResponseMessage" ,
59
66
)
60
67
68
+ _SEP = Literal [b"\r \n " , b"\n " ]
69
+
61
70
ASCIISET : Final [Set [str ]] = set (string .printable )
62
71
63
72
# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
70
79
METHRE : Final [Pattern [str ]] = re .compile (r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+" )
71
80
VERSRE : Final [Pattern [str ]] = re .compile (r"HTTP/(\d).(\d)" )
72
81
HDRRE : Final [Pattern [bytes ]] = re .compile (rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]" )
82
+ HEXDIGIT = re .compile (rb"[0-9a-fA-F]+" )
73
83
74
84
75
85
class RawRequestMessage (NamedTuple ):
@@ -173,7 +183,8 @@ def parse_headers(
173
183
# consume continuation lines
174
184
continuation = line and line [0 ] in (32 , 9 ) # (' ', '\t')
175
185
176
- # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
186
+ # Deprecated:
187
+ # https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
177
188
if continuation :
178
189
bvalue_lst = [bvalue ]
179
190
while continuation :
@@ -223,6 +234,8 @@ def parse_headers(
223
234
224
235
225
236
class HttpParser (abc .ABC , Generic [_MsgT ]):
237
+ lax : ClassVar [bool ] = False
238
+
226
239
def __init__ (
227
240
self ,
228
241
protocol : Optional [BaseProtocol ] = None ,
@@ -285,7 +298,7 @@ def feed_eof(self) -> Optional[_MsgT]:
285
298
def feed_data (
286
299
self ,
287
300
data : bytes ,
288
- SEP : bytes = b"\r \n " ,
301
+ SEP : _SEP = b"\r \n " ,
289
302
EMPTY : bytes = b"" ,
290
303
CONTENT_LENGTH : istr = hdrs .CONTENT_LENGTH ,
291
304
METH_CONNECT : str = hdrs .METH_CONNECT ,
@@ -309,13 +322,16 @@ def feed_data(
309
322
pos = data .find (SEP , start_pos )
310
323
# consume \r\n
311
324
if pos == start_pos and not self ._lines :
312
- start_pos = pos + 2
325
+ start_pos = pos + len ( SEP )
313
326
continue
314
327
315
328
if pos >= start_pos :
316
329
# line found
317
- self ._lines .append (data [start_pos :pos ])
318
- start_pos = pos + 2
330
+ line = data [start_pos :pos ]
331
+ if SEP == b"\n " : # For lax response parsing
332
+ line = line .rstrip (b"\r " )
333
+ self ._lines .append (line )
334
+ start_pos = pos + len (SEP )
319
335
320
336
# \r\n\r\n found
321
337
if self ._lines [- 1 ] == EMPTY :
@@ -332,7 +348,7 @@ def get_content_length() -> Optional[int]:
332
348
333
349
# Shouldn't allow +/- or other number formats.
334
350
# https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
335
- if not length_hdr .strip (" \t " ).isdigit ():
351
+ if not length_hdr .strip (" \t " ).isdecimal ():
336
352
raise InvalidHeader (CONTENT_LENGTH )
337
353
338
354
return int (length_hdr )
@@ -369,6 +385,7 @@ def get_content_length() -> Optional[int]:
369
385
readall = self .readall ,
370
386
response_with_body = self .response_with_body ,
371
387
auto_decompress = self ._auto_decompress ,
388
+ lax = self .lax ,
372
389
)
373
390
if not payload_parser .done :
374
391
self ._payload_parser = payload_parser
@@ -387,6 +404,7 @@ def get_content_length() -> Optional[int]:
387
404
compression = msg .compression ,
388
405
readall = True ,
389
406
auto_decompress = self ._auto_decompress ,
407
+ lax = self .lax ,
390
408
)
391
409
else :
392
410
if (
@@ -410,6 +428,7 @@ def get_content_length() -> Optional[int]:
410
428
readall = True ,
411
429
response_with_body = self .response_with_body ,
412
430
auto_decompress = self ._auto_decompress ,
431
+ lax = self .lax ,
413
432
)
414
433
if not payload_parser .done :
415
434
self ._payload_parser = payload_parser
@@ -432,7 +451,7 @@ def get_content_length() -> Optional[int]:
432
451
assert not self ._lines
433
452
assert self ._payload_parser is not None
434
453
try :
435
- eof , data = self ._payload_parser .feed_data (data [start_pos :])
454
+ eof , data = self ._payload_parser .feed_data (data [start_pos :], SEP )
436
455
except BaseException as exc :
437
456
if self .payload_exception is not None :
438
457
self ._payload_parser .payload .set_exception (
@@ -627,6 +646,20 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
627
646
Returns RawResponseMessage.
628
647
"""
629
648
649
+ # Lax mode should only be enabled on response parser.
650
+ lax = not DEBUG
651
+
652
+ def feed_data (
653
+ self ,
654
+ data : bytes ,
655
+ SEP : Optional [_SEP ] = None ,
656
+ * args : Any ,
657
+ ** kwargs : Any ,
658
+ ) -> Tuple [List [Tuple [RawResponseMessage , StreamReader ]], bool , bytes ]:
659
+ if SEP is None :
660
+ SEP = b"\r \n " if DEBUG else b"\n "
661
+ return super ().feed_data (data , SEP , * args , ** kwargs )
662
+
630
663
def parse_message (self , lines : List [bytes ]) -> RawResponseMessage :
631
664
line = lines [0 ].decode ("utf-8" , "surrogateescape" )
632
665
try :
@@ -651,7 +684,7 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
651
684
version_o = HttpVersion (int (match .group (1 )), int (match .group (2 )))
652
685
653
686
# The status code is a three-digit number
654
- if len (status ) != 3 or not status .isdigit ():
687
+ if len (status ) != 3 or not status .isdecimal ():
655
688
raise BadStatusLine (line )
656
689
status_i = int (status )
657
690
@@ -693,13 +726,15 @@ def __init__(
693
726
readall : bool = False ,
694
727
response_with_body : bool = True ,
695
728
auto_decompress : bool = True ,
729
+ lax : bool = False ,
696
730
) -> None :
697
731
self ._length = 0
698
732
self ._type = ParseState .PARSE_NONE
699
733
self ._chunk = ChunkState .PARSE_CHUNKED_SIZE
700
734
self ._chunk_size = 0
701
735
self ._chunk_tail = b""
702
736
self ._auto_decompress = auto_decompress
737
+ self ._lax = lax
703
738
self .done = False
704
739
705
740
# payload decompression wrapper
@@ -751,7 +786,7 @@ def feed_eof(self) -> None:
751
786
)
752
787
753
788
def feed_data (
754
- self , chunk : bytes , SEP : bytes = b"\r \n " , CHUNK_EXT : bytes = b";"
789
+ self , chunk : bytes , SEP : _SEP = b"\r \n " , CHUNK_EXT : bytes = b";"
755
790
) -> Tuple [bool , bytes ]:
756
791
# Read specified amount of bytes
757
792
if self ._type == ParseState .PARSE_LENGTH :
@@ -788,17 +823,22 @@ def feed_data(
788
823
else :
789
824
size_b = chunk [:pos ]
790
825
791
- if not size_b .isdigit ():
826
+ if self ._lax : # Allow whitespace in lax mode.
827
+ size_b = size_b .strip ()
828
+
829
+ if not re .fullmatch (HEXDIGIT , size_b ):
792
830
exc = TransferEncodingError (
793
831
chunk [:pos ].decode ("ascii" , "surrogateescape" )
794
832
)
795
833
self .payload .set_exception (exc )
796
834
raise exc
797
835
size = int (bytes (size_b ), 16 )
798
836
799
- chunk = chunk [pos + 2 :]
837
+ chunk = chunk [pos + len ( SEP ) :]
800
838
if size == 0 : # eof marker
801
839
self ._chunk = ChunkState .PARSE_MAYBE_TRAILERS
840
+ if self ._lax and chunk .startswith (b"\r " ):
841
+ chunk = chunk [1 :]
802
842
else :
803
843
self ._chunk = ChunkState .PARSE_CHUNKED_CHUNK
804
844
self ._chunk_size = size
@@ -820,13 +860,15 @@ def feed_data(
820
860
self ._chunk_size = 0
821
861
self .payload .feed_data (chunk [:required ], required )
822
862
chunk = chunk [required :]
863
+ if self ._lax and chunk .startswith (b"\r " ):
864
+ chunk = chunk [1 :]
823
865
self ._chunk = ChunkState .PARSE_CHUNKED_CHUNK_EOF
824
866
self .payload .end_http_chunk_receiving ()
825
867
826
868
# toss the CRLF at the end of the chunk
827
869
if self ._chunk == ChunkState .PARSE_CHUNKED_CHUNK_EOF :
828
- if chunk [:2 ] == SEP :
829
- chunk = chunk [2 :]
870
+ if chunk [: len ( SEP ) ] == SEP :
871
+ chunk = chunk [len ( SEP ) :]
830
872
self ._chunk = ChunkState .PARSE_CHUNKED_SIZE
831
873
else :
832
874
self ._chunk_tail = chunk
@@ -836,11 +878,11 @@ def feed_data(
836
878
# we should get another \r\n otherwise
837
879
# trailers needs to be skiped until \r\n\r\n
838
880
if self ._chunk == ChunkState .PARSE_MAYBE_TRAILERS :
839
- head = chunk [:2 ]
881
+ head = chunk [: len ( SEP ) ]
840
882
if head == SEP :
841
883
# end of stream
842
884
self .payload .feed_eof ()
843
- return True , chunk [2 :]
885
+ return True , chunk [len ( SEP ) :]
844
886
# Both CR and LF, or only LF may not be received yet. It is
845
887
# expected that CRLF or LF will be shown at the very first
846
888
# byte next time, otherwise trailers should come. The last
@@ -858,7 +900,7 @@ def feed_data(
858
900
if self ._chunk == ChunkState .PARSE_TRAILERS :
859
901
pos = chunk .find (SEP )
860
902
if pos >= 0 :
861
- chunk = chunk [pos + 2 :]
903
+ chunk = chunk [pos + len ( SEP ) :]
862
904
self ._chunk = ChunkState .PARSE_MAYBE_TRAILERS
863
905
else :
864
906
self ._chunk_tail = chunk
0 commit comments