3
3
4
4
import asyncio
5
5
import re
6
- from typing import Any , List
6
+ from contextlib import nullcontext
7
+ from typing import Any , Dict , List
7
8
from unittest import mock
8
9
from urllib .parse import quote
9
10
@@ -168,11 +169,27 @@ def test_cve_2023_37276(parser: Any) -> None:
168
169
parser .feed_data (text )
169
170
170
171
172
+ @pytest .mark .parametrize (
173
+ "rfc9110_5_6_2_token_delim" ,
174
+ r'"(),/:;<=>?@[\]{}' ,
175
+ )
176
+ def test_bad_header_name (parser : Any , rfc9110_5_6_2_token_delim : str ) -> None :
177
+ text = f"POST / HTTP/1.1\r \n head{ rfc9110_5_6_2_token_delim } er: val\r \n \r \n " .encode ()
178
+ expectation = pytest .raises (http_exceptions .BadHttpMessage )
179
+ if rfc9110_5_6_2_token_delim == ":" :
180
+ # Inserting colon into header just splits name/value earlier.
181
+ expectation = nullcontext ()
182
+ with expectation :
183
+ parser .feed_data (text )
184
+
185
+
171
186
@pytest .mark .parametrize (
172
187
"hdr" ,
173
188
(
174
189
"Content-Length: -5" , # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
175
190
"Content-Length: +256" ,
191
+ "Content-Length: \N{superscript one} " ,
192
+ "Content-Length: \N{mathematical double-struck digit one} " ,
176
193
"Foo: abc\r def" , # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
177
194
"Bar: abc\n def" ,
178
195
"Baz: abc\x00 def" ,
@@ -265,6 +282,20 @@ def test_parse_headers_longline(parser: Any) -> None:
265
282
parser .feed_data (text )
266
283
267
284
285
+ def test_parse_unusual_request_line (parser : Any ) -> None :
286
+ if not isinstance (response , HttpResponseParserPy ):
287
+ pytest .xfail ("Regression test for Py parser. May match C behaviour later." )
288
+ text = b"#smol //a HTTP/1.3\r \n \r \n "
289
+ messages , upgrade , tail = parser .feed_data (text )
290
+ assert len (messages ) == 1
291
+ msg , _ = messages [0 ]
292
+ assert msg .compression is None
293
+ assert not msg .upgrade
294
+ assert msg .method == "#smol"
295
+ assert msg .path == "//a"
296
+ assert msg .version == (1 , 3 )
297
+
298
+
268
299
def test_parse (parser : Any ) -> None :
269
300
text = b"GET /test HTTP/1.1\r \n \r \n "
270
301
messages , upgrade , tail = parser .feed_data (text )
@@ -567,6 +598,45 @@ def test_headers_content_length_err_2(parser: Any) -> None:
567
598
parser .feed_data (text )
568
599
569
600
601
+ _pad : Dict [bytes , str ] = {
602
+ b"" : "empty" ,
603
+ # not a typo. Python likes triple zero
604
+ b"\000 " : "NUL" ,
605
+ b" " : "SP" ,
606
+ b" " : "SPSP" ,
607
+ # not a typo: both 0xa0 and 0x0a in case of 8-bit fun
608
+ b"\n " : "LF" ,
609
+ b"\xa0 " : "NBSP" ,
610
+ b"\t " : "TABSP" ,
611
+ }
612
+
613
+
614
+ @pytest .mark .parametrize ("hdr" , [b"" , b"foo" ], ids = ["name-empty" , "with-name" ])
615
+ @pytest .mark .parametrize ("pad2" , _pad .keys (), ids = ["post-" + n for n in _pad .values ()])
616
+ @pytest .mark .parametrize ("pad1" , _pad .keys (), ids = ["pre-" + n for n in _pad .values ()])
617
+ def test_invalid_header_spacing (
618
+ parser : Any , pad1 : bytes , pad2 : bytes , hdr : bytes
619
+ ) -> None :
620
+ text = b"GET /test HTTP/1.1\r \n " b"%s%s%s: value\r \n \r \n " % (pad1 , hdr , pad2 )
621
+ expectation = pytest .raises (http_exceptions .BadHttpMessage )
622
+ if pad1 == pad2 == b"" and hdr != b"" :
623
+ # one entry in param matrix is correct: non-empty name, not padded
624
+ expectation = nullcontext ()
625
+ if pad1 == pad2 == hdr == b"" :
626
+ if not isinstance (response , HttpResponseParserPy ):
627
+ pytest .xfail ("Regression test for Py parser. May match C behaviour later." )
628
+ with expectation :
629
+ parser .feed_data (text )
630
+
631
+
632
+ def test_empty_header_name (parser : Any ) -> None :
633
+ if not isinstance (response , HttpResponseParserPy ):
634
+ pytest .xfail ("Regression test for Py parser. May match C behaviour later." )
635
+ text = b"GET /test HTTP/1.1\r \n " b":test\r \n \r \n "
636
+ with pytest .raises (http_exceptions .BadHttpMessage ):
637
+ parser .feed_data (text )
638
+
639
+
570
640
def test_invalid_header (parser : Any ) -> None :
571
641
text = b"GET /test HTTP/1.1\r \n " b"test line\r \n \r \n "
572
642
with pytest .raises (http_exceptions .BadHttpMessage ):
@@ -689,6 +759,34 @@ def test_http_request_bad_status_line(parser: Any) -> None:
689
759
assert r"\n" not in exc_info .value .message
690
760
691
761
762
+ _num : Dict [bytes , str ] = {
763
+ # dangerous: accepted by Python int()
764
+ # unicodedata.category("\U0001D7D9") == 'Nd'
765
+ "\N{mathematical double-struck digit one} " .encode (): "utf8digit" ,
766
+ # only added for interop tests, refused by Python int()
767
+ # unicodedata.category("\U000000B9") == 'No'
768
+ "\N{superscript one} " .encode (): "utf8number" ,
769
+ "\N{superscript one} " .encode ("latin-1" ): "latin1number" ,
770
+ }
771
+
772
+
773
+ @pytest .mark .parametrize ("nonascii_digit" , _num .keys (), ids = _num .values ())
774
+ def test_http_request_bad_status_line_number (
775
+ parser : Any , nonascii_digit : bytes
776
+ ) -> None :
777
+ text = b"GET /digit HTTP/1." + nonascii_digit + b"\r \n \r \n "
778
+ with pytest .raises (http_exceptions .BadStatusLine ):
779
+ parser .feed_data (text )
780
+
781
+
782
+ def test_http_request_bad_status_line_separator (parser : Any ) -> None :
783
+ # single code point, old, multibyte NFKC, multibyte NFKD
784
+ utf8sep = "\N{arabic ligature sallallahou alayhe wasallam} " .encode ()
785
+ text = b"GET /ligature HTTP/1" + utf8sep + b"1\r \n \r \n "
786
+ with pytest .raises (http_exceptions .BadStatusLine ):
787
+ parser .feed_data (text )
788
+
789
+
692
790
def test_http_request_bad_status_line_whitespace (parser : Any ) -> None :
693
791
text = b"GET\n /path\f HTTP/1.1\r \n \r \n "
694
792
with pytest .raises (http_exceptions .BadStatusLine ):
@@ -710,6 +808,31 @@ def test_http_request_upgrade(parser: Any) -> None:
710
808
assert tail == b"some raw data"
711
809
712
810
811
+ def test_http_request_parser_utf8_request_line (parser : Any ) -> None :
812
+ if not isinstance (response , HttpResponseParserPy ):
813
+ pytest .xfail ("Regression test for Py parser. May match C behaviour later." )
814
+ messages , upgrade , tail = parser .feed_data (
815
+ # note the truncated unicode sequence
816
+ b"GET /P\xc3 \xbc nktchen\xa0 \xef \xb7 HTTP/1.1\r \n " +
817
+ # for easier grep: ASCII 0xA0 more commonly known as non-breaking space
818
+ # note the leading and trailing spaces
819
+ "sTeP: \N{latin small letter sharp s} nek\t \N{no-break space} "
820
+ "\r \n \r \n " .encode ()
821
+ )
822
+ msg = messages [0 ][0 ]
823
+
824
+ assert msg .method == "GET"
825
+ assert msg .path == "/Pünktchen\udca0 \udcef \udcb7 "
826
+ assert msg .version == (1 , 1 )
827
+ assert msg .headers == CIMultiDict ([("STEP" , "ßnek\t \xa0 " )])
828
+ assert msg .raw_headers == ((b"sTeP" , "ßnek\t \xa0 " .encode ()),)
829
+ assert not msg .should_close
830
+ assert msg .compression is None
831
+ assert not msg .upgrade
832
+ assert not msg .chunked
833
+ assert msg .url .path == URL ("/P%C3%BCnktchen\udca0 \udcef \udcb7 " ).path
834
+
835
+
713
836
def test_http_request_parser_utf8 (parser : Any ) -> None :
714
837
text = "GET /path HTTP/1.1\r \n x-test:тест\r \n \r \n " .encode ()
715
838
messages , upgrade , tail = parser .feed_data (text )
@@ -759,9 +882,15 @@ def test_http_request_parser_two_slashes(parser: Any) -> None:
759
882
assert not msg .chunked
760
883
761
884
762
- def test_http_request_parser_bad_method (parser : Any ) -> None :
885
+ @pytest .mark .parametrize (
886
+ "rfc9110_5_6_2_token_delim" ,
887
+ [bytes ([i ]) for i in rb'"(),/:;<=>?@[\]{}' ],
888
+ )
889
+ def test_http_request_parser_bad_method (
890
+ parser : Any , rfc9110_5_6_2_token_delim : bytes
891
+ ) -> None :
763
892
with pytest .raises (http_exceptions .BadStatusLine ):
764
- parser .feed_data (b'G=":<>(e),[T];? " /get HTTP/1.1\r \n \r \n ' )
893
+ parser .feed_data (rfc9110_5_6_2_token_delim + b'ET " /get HTTP/1.1\r \n \r \n ' )
765
894
766
895
767
896
def test_http_request_parser_bad_version (parser : Any ) -> None :
@@ -979,6 +1108,14 @@ def test_http_response_parser_code_not_int(response: Any) -> None:
979
1108
response .feed_data (b"HTTP/1.1 ttt test\r \n \r \n " )
980
1109
981
1110
1111
+ @pytest .mark .parametrize ("nonascii_digit" , _num .keys (), ids = _num .values ())
1112
+ def test_http_response_parser_code_not_ascii (
1113
+ response : Any , nonascii_digit : bytes
1114
+ ) -> None :
1115
+ with pytest .raises (http_exceptions .BadStatusLine ):
1116
+ response .feed_data (b"HTTP/1.1 20" + nonascii_digit + b" test\r \n \r \n " )
1117
+
1118
+
982
1119
def test_http_request_chunked_payload (parser : Any ) -> None :
983
1120
text = b"GET /test HTTP/1.1\r \n " b"transfer-encoding: chunked\r \n \r \n "
984
1121
msg , payload = parser .feed_data (text )[0 ][0 ]
0 commit comments