2
2
3
3
import asyncio
4
4
import re
5
- from typing import Any , List
5
+ from contextlib import nullcontext
6
+ from typing import Any , Dict , List
6
7
from unittest import mock
7
8
from urllib .parse import quote
8
9
@@ -169,11 +170,27 @@ def test_cve_2023_37276(parser: Any) -> None:
169
170
parser .feed_data (text )
170
171
171
172
173
+ @pytest .mark .parametrize (
174
+ "rfc9110_5_6_2_token_delim" ,
175
+ r'"(),/:;<=>?@[\]{}' ,
176
+ )
177
+ def test_bad_header_name (parser : Any , rfc9110_5_6_2_token_delim : str ) -> None :
178
+ text = f"POST / HTTP/1.1\r \n head{ rfc9110_5_6_2_token_delim } er: val\r \n \r \n " .encode ()
179
+ expectation = pytest .raises (http_exceptions .BadHttpMessage )
180
+ if rfc9110_5_6_2_token_delim == ":" :
181
+ # Inserting colon into header just splits name/value earlier.
182
+ expectation = nullcontext ()
183
+ with expectation :
184
+ parser .feed_data (text )
185
+
186
+
172
187
@pytest .mark .parametrize (
173
188
"hdr" ,
174
189
(
175
190
"Content-Length: -5" , # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
176
191
"Content-Length: +256" ,
192
+ "Content-Length: \N{superscript one} " ,
193
+ "Content-Length: \N{mathematical double-struck digit one} " ,
177
194
"Foo: abc\r def" , # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
178
195
"Bar: abc\n def" ,
179
196
"Baz: abc\x00 def" ,
@@ -266,6 +283,20 @@ def test_parse_headers_longline(parser: Any) -> None:
266
283
parser .feed_data (text )
267
284
268
285
286
+ def test_parse_unusual_request_line (parser ) -> None :
287
+ if not isinstance (response , HttpResponseParserPy ):
288
+ pytest .xfail ("Regression test for Py parser. May match C behaviour later." )
289
+ text = b"#smol //a HTTP/1.3\r \n \r \n "
290
+ messages , upgrade , tail = parser .feed_data (text )
291
+ assert len (messages ) == 1
292
+ msg , _ = messages [0 ]
293
+ assert msg .compression is None
294
+ assert not msg .upgrade
295
+ assert msg .method == "#smol"
296
+ assert msg .path == "//a"
297
+ assert msg .version == (1 , 3 )
298
+
299
+
269
300
def test_parse (parser ) -> None :
270
301
text = b"GET /test HTTP/1.1\r \n \r \n "
271
302
messages , upgrade , tail = parser .feed_data (text )
@@ -568,6 +599,43 @@ def test_headers_content_length_err_2(parser) -> None:
568
599
parser .feed_data (text )
569
600
570
601
602
+ _pad : Dict [bytes , str ] = {
603
+ b"" : "empty" ,
604
+ # not a typo. Python likes triple zero
605
+ b"\000 " : "NUL" ,
606
+ b" " : "SP" ,
607
+ b" " : "SPSP" ,
608
+ # not a typo: both 0xa0 and 0x0a in case of 8-bit fun
609
+ b"\n " : "LF" ,
610
+ b"\xa0 " : "NBSP" ,
611
+ b"\t " : "TABSP" ,
612
+ }
613
+
614
+
615
+ @pytest .mark .parametrize ("hdr" , [b"" , b"foo" ], ids = ["name-empty" , "with-name" ])
616
+ @pytest .mark .parametrize ("pad2" , _pad .keys (), ids = ["post-" + n for n in _pad .values ()])
617
+ @pytest .mark .parametrize ("pad1" , _pad .keys (), ids = ["pre-" + n for n in _pad .values ()])
618
+ def test_invalid_header_spacing (parser , pad1 : bytes , pad2 : bytes , hdr : bytes ) -> None :
619
+ text = b"GET /test HTTP/1.1\r \n " b"%s%s%s: value\r \n \r \n " % (pad1 , hdr , pad2 )
620
+ expectation = pytest .raises (http_exceptions .BadHttpMessage )
621
+ if pad1 == pad2 == b"" and hdr != b"" :
622
+ # one entry in param matrix is correct: non-empty name, not padded
623
+ expectation = nullcontext ()
624
+ if pad1 == pad2 == hdr == b"" :
625
+ if not isinstance (response , HttpResponseParserPy ):
626
+ pytest .xfail ("Regression test for Py parser. May match C behaviour later." )
627
+ with expectation :
628
+ parser .feed_data (text )
629
+
630
+
631
+ def test_empty_header_name (parser ) -> None :
632
+ if not isinstance (response , HttpResponseParserPy ):
633
+ pytest .xfail ("Regression test for Py parser. May match C behaviour later." )
634
+ text = b"GET /test HTTP/1.1\r \n " b":test\r \n \r \n "
635
+ with pytest .raises (http_exceptions .BadHttpMessage ):
636
+ parser .feed_data (text )
637
+
638
+
571
639
def test_invalid_header (parser ) -> None :
572
640
text = b"GET /test HTTP/1.1\r \n " b"test line\r \n \r \n "
573
641
with pytest .raises (http_exceptions .BadHttpMessage ):
@@ -690,6 +758,34 @@ def test_http_request_bad_status_line(parser) -> None:
690
758
assert r"\n" not in exc_info .value .message
691
759
692
760
761
+ _num : Dict [bytes , str ] = {
762
+ # dangerous: accepted by Python int()
763
+ # unicodedata.category("\U0001D7D9") == 'Nd'
764
+ "\N{mathematical double-struck digit one} " .encode (): "utf8digit" ,
765
+ # only added for interop tests, refused by Python int()
766
+ # unicodedata.category("\U000000B9") == 'No'
767
+ "\N{superscript one} " .encode (): "utf8number" ,
768
+ "\N{superscript one} " .encode ("latin-1" ): "latin1number" ,
769
+ }
770
+
771
+
772
+ @pytest .mark .parametrize ("nonascii_digit" , _num .keys (), ids = _num .values ())
773
+ def test_http_request_bad_status_line_number (
774
+ parser : Any , nonascii_digit : bytes
775
+ ) -> None :
776
+ text = b"GET /digit HTTP/1." + nonascii_digit + b"\r \n \r \n "
777
+ with pytest .raises (http_exceptions .BadStatusLine ):
778
+ parser .feed_data (text )
779
+
780
+
781
+ def test_http_request_bad_status_line_separator (parser : Any ) -> None :
782
+ # single code point, old, multibyte NFKC, multibyte NFKD
783
+ utf8sep = "\N{arabic ligature sallallahou alayhe wasallam} " .encode ()
784
+ text = b"GET /ligature HTTP/1" + utf8sep + b"1\r \n \r \n "
785
+ with pytest .raises (http_exceptions .BadStatusLine ):
786
+ parser .feed_data (text )
787
+
788
+
693
789
def test_http_request_bad_status_line_whitespace (parser : Any ) -> None :
694
790
text = b"GET\n /path\f HTTP/1.1\r \n \r \n "
695
791
with pytest .raises (http_exceptions .BadStatusLine ):
@@ -711,6 +807,31 @@ def test_http_request_upgrade(parser: Any) -> None:
711
807
assert tail == b"some raw data"
712
808
713
809
810
+ def test_http_request_parser_utf8_request_line (parser ) -> None :
811
+ if not isinstance (response , HttpResponseParserPy ):
812
+ pytest .xfail ("Regression test for Py parser. May match C behaviour later." )
813
+ messages , upgrade , tail = parser .feed_data (
814
+ # note the truncated unicode sequence
815
+ b"GET /P\xc3 \xbc nktchen\xa0 \xef \xb7 HTTP/1.1\r \n " +
816
+ # for easier grep: ASCII 0xA0 more commonly known as non-breaking space
817
+ # note the leading and trailing spaces
818
+ "sTeP: \N{latin small letter sharp s} nek\t \N{no-break space} "
819
+ "\r \n \r \n " .encode ()
820
+ )
821
+ msg = messages [0 ][0 ]
822
+
823
+ assert msg .method == "GET"
824
+ assert msg .path == "/Pünktchen\udca0 \udcef \udcb7 "
825
+ assert msg .version == (1 , 1 )
826
+ assert msg .headers == CIMultiDict ([("STEP" , "ßnek\t \xa0 " )])
827
+ assert msg .raw_headers == ((b"sTeP" , "ßnek\t \xa0 " .encode ()),)
828
+ assert not msg .should_close
829
+ assert msg .compression is None
830
+ assert not msg .upgrade
831
+ assert not msg .chunked
832
+ assert msg .url .path == URL ("/P%C3%BCnktchen\udca0 \udcef \udcb7 " ).path
833
+
834
+
714
835
def test_http_request_parser_utf8 (parser ) -> None :
715
836
text = "GET /path HTTP/1.1\r \n x-test:тест\r \n \r \n " .encode ()
716
837
messages , upgrade , tail = parser .feed_data (text )
@@ -760,9 +881,15 @@ def test_http_request_parser_two_slashes(parser) -> None:
760
881
assert not msg .chunked
761
882
762
883
763
- def test_http_request_parser_bad_method (parser ) -> None :
884
+ @pytest .mark .parametrize (
885
+ "rfc9110_5_6_2_token_delim" ,
886
+ [bytes ([i ]) for i in rb'"(),/:;<=>?@[\]{}' ],
887
+ )
888
+ def test_http_request_parser_bad_method (
889
+ parser , rfc9110_5_6_2_token_delim : bytes
890
+ ) -> None :
764
891
with pytest .raises (http_exceptions .BadStatusLine ):
765
- parser .feed_data (b'G=":<>(e),[T];? " /get HTTP/1.1\r \n \r \n ' )
892
+ parser .feed_data (rfc9110_5_6_2_token_delim + b'ET " /get HTTP/1.1\r \n \r \n ' )
766
893
767
894
768
895
def test_http_request_parser_bad_version (parser ) -> None :
@@ -974,6 +1101,12 @@ def test_http_response_parser_code_not_int(response) -> None:
974
1101
response .feed_data (b"HTTP/1.1 ttt test\r \n \r \n " )
975
1102
976
1103
1104
+ @pytest .mark .parametrize ("nonascii_digit" , _num .keys (), ids = _num .values ())
1105
+ def test_http_response_parser_code_not_ascii (response , nonascii_digit : bytes ) -> None :
1106
+ with pytest .raises (http_exceptions .BadStatusLine ):
1107
+ response .feed_data (b"HTTP/1.1 20" + nonascii_digit + b" test\r \n \r \n " )
1108
+
1109
+
977
1110
def test_http_request_chunked_payload (parser ) -> None :
978
1111
text = b"GET /test HTTP/1.1\r \n " b"transfer-encoding: chunked\r \n \r \n "
979
1112
msg , payload = parser .feed_data (text )[0 ][0 ]
0 commit comments