Skip to content

Commit 454092d

Browse files
Merge pull request from GHSA-gfw2-4jvh-wgfg
* Update Python parser for RFCs 9110/9112 * Add tests * Update http_parser.py * Update test_http_parser.py * Update http_parser.py * Update http_parser.py * Update http_parser.py * Update test_http_parser.py * Update http_parser.py * Update test_http_parser.py * Update test_http_parser.py * Update test_http_parser.py * Update http_parser.py * Name the duplicate header in error message * Add docstring * Update test_http_parser.py * Concatenation Co-authored-by: Sviatoslav Sydorenko <[email protected]> * Cleanup bad version tests * Fix bad_chunked test --------- Co-authored-by: Sviatoslav Sydorenko <[email protected]>
1 parent 19ffc64 commit 454092d

File tree

2 files changed

+119
-40
lines changed

2 files changed

+119
-40
lines changed

aiohttp/http_parser.py

+40-35
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,16 @@
5050

5151
ASCIISET: Final[Set[str]] = set(string.printable)
5252

53-
# See https://tools.ietf.org/html/rfc7230#section-3.1.1
54-
# and https://tools.ietf.org/html/rfc7230#appendix-B
53+
# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
54+
# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
5555
#
5656
# method = token
5757
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
5858
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
5959
# token = 1*tchar
6060
METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
61-
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)")
62-
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
61+
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
62+
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
6363

6464

6565
class RawRequestMessage(NamedTuple):
@@ -131,8 +131,11 @@ def parse_headers(
131131
except ValueError:
132132
raise InvalidHeader(line) from None
133133

134-
bname = bname.strip(b" \t")
135-
bvalue = bvalue.lstrip()
134+
# https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
135+
if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
136+
raise InvalidHeader(line)
137+
138+
bvalue = bvalue.lstrip(b" \t")
136139
if HDRRE.search(bname):
137140
raise InvalidHeader(bname)
138141
if len(bname) > self.max_field_size:
@@ -153,6 +156,7 @@ def parse_headers(
153156
# consume continuation lines
154157
continuation = line and line[0] in (32, 9) # (' ', '\t')
155158

159+
# Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
156160
if continuation:
157161
bvalue_lst = [bvalue]
158162
while continuation:
@@ -187,10 +191,14 @@ def parse_headers(
187191
str(header_length),
188192
)
189193

190-
bvalue = bvalue.strip()
194+
bvalue = bvalue.strip(b" \t")
191195
name = bname.decode("utf-8", "surrogateescape")
192196
value = bvalue.decode("utf-8", "surrogateescape")
193197

198+
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
199+
if "\n" in value or "\r" in value or "\x00" in value:
200+
raise InvalidHeader(bvalue)
201+
194202
headers.add(name, value)
195203
raw_headers.append((bname, bvalue))
196204

@@ -301,15 +309,12 @@ def get_content_length() -> Optional[int]:
301309
if length_hdr is None:
302310
return None
303311

304-
try:
305-
length = int(length_hdr)
306-
except ValueError:
312+
# Shouldn't allow +/- or other number formats.
313+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
314+
if not length_hdr.strip(" \t").isdigit():
307315
raise InvalidHeader(CONTENT_LENGTH)
308316

309-
if length < 0:
310-
raise InvalidHeader(CONTENT_LENGTH)
311-
312-
return length
317+
return int(length_hdr)
313318

314319
length = get_content_length()
315320
# do not support old websocket spec
@@ -449,6 +454,15 @@ def parse_headers(
449454
upgrade = False
450455
chunked = False
451456

457+
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
458+
# https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
459+
singletons = (hdrs.CONTENT_LENGTH, hdrs.CONTENT_LOCATION, hdrs.CONTENT_RANGE,
460+
hdrs.CONTENT_TYPE, hdrs.ETAG, hdrs.HOST, hdrs.MAX_FORWARDS,
461+
hdrs.SERVER, hdrs.TRANSFER_ENCODING, hdrs.USER_AGENT)
462+
bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
463+
if bad_hdr is not None:
464+
raise BadHttpMessage("Duplicate '{}' header found.".format(bad_hdr))
465+
452466
# keep-alive
453467
conn = headers.get(hdrs.CONNECTION)
454468
if conn:
@@ -502,7 +516,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
502516
# request line
503517
line = lines[0].decode("utf-8", "surrogateescape")
504518
try:
505-
method, path, version = line.split(None, 2)
519+
method, path, version = line.split(maxsplit=2)
506520
except ValueError:
507521
raise BadStatusLine(line) from None
508522

@@ -516,14 +530,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
516530
raise BadStatusLine(method)
517531

518532
# version
519-
try:
520-
if version.startswith("HTTP/"):
521-
n1, n2 = version[5:].split(".", 1)
522-
version_o = HttpVersion(int(n1), int(n2))
523-
else:
524-
raise BadStatusLine(version)
525-
except Exception:
526-
raise BadStatusLine(version)
533+
match = VERSRE.match(version)
534+
if match is None:
535+
raise BadStatusLine(line)
536+
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
527537

528538
if method == "CONNECT":
529539
# authority-form,
@@ -590,12 +600,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
590600
def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
591601
line = lines[0].decode("utf-8", "surrogateescape")
592602
try:
593-
version, status = line.split(None, 1)
603+
version, status = line.split(maxsplit=1)
594604
except ValueError:
595605
raise BadStatusLine(line) from None
596606

597607
try:
598-
status, reason = status.split(None, 1)
608+
status, reason = status.split(maxsplit=1)
599609
except ValueError:
600610
reason = ""
601611

@@ -611,13 +621,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
611621
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
612622

613623
# The status code is a three-digit number
614-
try:
615-
status_i = int(status)
616-
except ValueError:
617-
raise BadStatusLine(line) from None
618-
619-
if status_i > 999:
624+
if len(status) != 3 or not status.isdigit():
620625
raise BadStatusLine(line)
626+
status_i = int(status)
621627

622628
# read headers
623629
(
@@ -751,14 +757,13 @@ def feed_data(
751757
else:
752758
size_b = chunk[:pos]
753759

754-
try:
755-
size = int(bytes(size_b), 16)
756-
except ValueError:
760+
if not size_b.isdigit():
757761
exc = TransferEncodingError(
758762
chunk[:pos].decode("ascii", "surrogateescape")
759763
)
760764
self.payload.set_exception(exc)
761-
raise exc from None
765+
raise exc
766+
size = int(bytes(size_b), 16)
762767

763768
chunk = chunk[pos + 2 :]
764769
if size == 0: # eof marker

tests/test_http_parser.py

+79-5
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,71 @@ def test_invalid_name(parser: Any) -> None:
475475
parser.feed_data(text)
476476

477477

478+
def test_cve_2023_37276(parser: Any) -> None:
479+
text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n"""
480+
with pytest.raises(http_exceptions.BadHttpMessage):
481+
parser.feed_data(text)
482+
483+
484+
@pytest.mark.parametrize(
485+
"hdr",
486+
(
487+
"Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
488+
"Content-Length: +256",
489+
"Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
490+
"Bar: abc\ndef",
491+
"Baz: abc\x00def",
492+
"Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
493+
"Foo\t: bar",
494+
)
495+
)
496+
def test_bad_headers(parser: Any, hdr: str) -> None:
497+
text = "POST / HTTP/1.1\r\n{}\r\n\r\n".format(hdr).encode()
498+
with pytest.raises(http_exceptions.InvalidHeader):
499+
parser.feed_data(text)
500+
501+
502+
def test_bad_chunked_py(loop: Any, protocol: Any) -> None:
503+
"""Test that invalid chunked encoding doesn't allow content-length to be used."""
504+
parser = HttpRequestParserPy(
505+
protocol,
506+
loop,
507+
2**16,
508+
max_line_size=8190,
509+
max_field_size=8190,
510+
)
511+
text = (b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
512+
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n")
513+
messages, upgrade, tail = parser.feed_data(text)
514+
assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError)
515+
516+
517+
@pytest.mark.skipif(
518+
"HttpRequestParserC" not in dir(aiohttp.http_parser),
519+
reason="C based HTTP parser not available",
520+
)
521+
def test_bad_chunked_c(loop: Any, protocol: Any) -> None:
522+
"""C parser behaves differently. Maybe we should align them later."""
523+
payload = b"GET1 /test HTTP/1.1\r\n\r\n"
524+
parser = HttpRequestParserC(
525+
protocol,
526+
loop,
527+
2**16,
528+
max_line_size=8190,
529+
max_field_size=8190,
530+
)
531+
text = (b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
532+
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n")
533+
with pytest.raises(http_exceptions.BadHttpMessage):
534+
parser.feed_data(text)
535+
536+
537+
def test_whitespace_before_header(parser: Any) -> None:
538+
text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX"
539+
with pytest.raises(http_exceptions.BadHttpMessage):
540+
parser.feed_data(text)
541+
542+
478543
@pytest.mark.parametrize("size", [40960, 8191])
479544
def test_max_header_field_size(parser: Any, size: Any) -> None:
480545
name = b"t" * size
@@ -656,6 +721,11 @@ def test_http_request_parser_bad_version(parser: Any) -> None:
656721
parser.feed_data(b"GET //get HT/11\r\n\r\n")
657722

658723

724+
def test_http_request_parser_bad_version_number(parser: Any) -> None:
725+
with pytest.raises(http_exceptions.BadHttpMessage):
726+
parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n")
727+
728+
659729
@pytest.mark.parametrize("size", [40965, 8191])
660730
def test_http_request_max_status_line(parser: Any, size: Any) -> None:
661731
path = b"t" * (size - 5)
@@ -725,6 +795,11 @@ def test_http_response_parser_bad_version(response: Any) -> None:
725795
response.feed_data(b"HT/11 200 Ok\r\n\r\n")
726796

727797

798+
def test_http_response_parser_bad_version_number(response: Any) -> None:
799+
with pytest.raises(http_exceptions.BadHttpMessage):
800+
response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n")
801+
802+
728803
def test_http_response_parser_no_reason(response: Any) -> None:
729804
msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0]
730805

@@ -755,19 +830,18 @@ def test_http_response_parser_bad(response: Any) -> None:
755830
response.feed_data(b"HTT/1\r\n\r\n")
756831

757832

758-
@pytest.mark.skipif(not NO_EXTENSIONS, reason="Behaviour has changed in C parser")
759833
def test_http_response_parser_code_under_100(response: Any) -> None:
760-
msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0]
761-
assert msg.code == 99
834+
with pytest.raises(http_exceptions.BadStatusLine):
835+
response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")
762836

763837

764838
def test_http_response_parser_code_above_999(response: Any) -> None:
765-
with pytest.raises(http_exceptions.BadHttpMessage):
839+
with pytest.raises(http_exceptions.BadStatusLine):
766840
response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n")
767841

768842

769843
def test_http_response_parser_code_not_int(response: Any) -> None:
770-
with pytest.raises(http_exceptions.BadHttpMessage):
844+
with pytest.raises(http_exceptions.BadStatusLine):
771845
response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
772846

773847

0 commit comments

Comments
 (0)