Skip to content

Commit 7d0be3f

Browse files
Fix handling of multipart/form-data (#8280)
https://datatracker.ietf.org/doc/html/rfc7578
1 parent cbc0c86 commit 7d0be3f

7 files changed

+151
-121
lines changed

CHANGES/8280.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed ``multipart/form-data`` compliance with :rfc:`7578` -- by :user:`Dreamsorcerer`.

CHANGES/8280.deprecation.rst

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Deprecated ``content_transfer_encoding`` parameter in :py:meth:`FormData.add_field()
2+
<aiohttp.FormData.add_field>` -- by :user:`Dreamsorcerer`.

aiohttp/formdata.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import io
2+
import warnings
23
from typing import Any, Iterable, List, Optional
34
from urllib.parse import urlencode
45

@@ -54,7 +55,12 @@ def add_field(
5455
if isinstance(value, io.IOBase):
5556
self._is_multipart = True
5657
elif isinstance(value, (bytes, bytearray, memoryview)):
58+
msg = (
59+
"In v4, passing bytes will no longer create a file field. "
60+
"Please explicitly use the filename parameter or pass a BytesIO object."
61+
)
5762
if filename is None and content_transfer_encoding is None:
63+
warnings.warn(msg, DeprecationWarning)
5864
filename = name
5965

6066
type_options: MultiDict[str] = MultiDict({"name": name})
@@ -82,7 +88,11 @@ def add_field(
8288
"content_transfer_encoding must be an instance"
8389
" of str. Got: %s" % content_transfer_encoding
8490
)
85-
headers[hdrs.CONTENT_TRANSFER_ENCODING] = content_transfer_encoding
91+
msg = (
92+
"content_transfer_encoding is deprecated. "
93+
"To maintain compatibility with v4 please pass a BytesPayload."
94+
)
95+
warnings.warn(msg, DeprecationWarning)
8696
self._is_multipart = True
8797

8898
self._fields.append((type_options, headers, value))

aiohttp/multipart.py

+76-40
Original file line numberDiff line numberDiff line change
@@ -260,12 +260,18 @@ def __init__(
260260
boundary: bytes,
261261
headers: "CIMultiDictProxy[str]",
262262
content: StreamReader,
263+
*,
264+
subtype: str = "mixed",
265+
default_charset: Optional[str] = None,
263266
) -> None:
264267
self.headers = headers
265268
self._boundary = boundary
266269
self._content = content
270+
self._default_charset = default_charset
267271
self._at_eof = False
268-
length = self.headers.get(CONTENT_LENGTH, None)
272+
self._is_form_data = subtype == "form-data"
273+
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
274+
length = None if self._is_form_data else self.headers.get(CONTENT_LENGTH, None)
269275
self._length = int(length) if length is not None else None
270276
self._read_bytes = 0
271277
self._unread: Deque[bytes] = deque()
@@ -357,6 +363,8 @@ async def _read_chunk_from_length(self, size: int) -> bytes:
357363
assert self._length is not None, "Content-Length required for chunked read"
358364
chunk_size = min(size, self._length - self._read_bytes)
359365
chunk = await self._content.read(chunk_size)
366+
if self._content.at_eof():
367+
self._at_eof = True
360368
return chunk
361369

362370
async def _read_chunk_from_stream(self, size: int) -> bytes:
@@ -477,7 +485,8 @@ def decode(self, data: bytes) -> bytes:
477485
"""
478486
if CONTENT_TRANSFER_ENCODING in self.headers:
479487
data = self._decode_content_transfer(data)
480-
if CONTENT_ENCODING in self.headers:
488+
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
489+
if not self._is_form_data and CONTENT_ENCODING in self.headers:
481490
return self._decode_content(data)
482491
return data
483492

@@ -511,7 +520,7 @@ def get_charset(self, default: str) -> str:
511520
"""Returns charset parameter from Content-Type header or default."""
512521
ctype = self.headers.get(CONTENT_TYPE, "")
513522
mimetype = parse_mimetype(ctype)
514-
return mimetype.parameters.get("charset", default)
523+
return mimetype.parameters.get("charset", self._default_charset or default)
515524

516525
@reify
517526
def name(self) -> Optional[str]:
@@ -570,9 +579,17 @@ def __init__(
570579
headers: Mapping[str, str],
571580
content: StreamReader,
572581
) -> None:
582+
self._mimetype = parse_mimetype(headers[CONTENT_TYPE])
583+
assert self._mimetype.type == "multipart", "multipart/* content type expected"
584+
if "boundary" not in self._mimetype.parameters:
585+
raise ValueError(
586+
"boundary missed for Content-Type: %s" % headers[CONTENT_TYPE]
587+
)
588+
573589
self.headers = headers
574590
self._boundary = ("--" + self._get_boundary()).encode()
575591
self._content = content
592+
self._default_charset: Optional[str] = None
576593
self._last_part: Optional[Union["MultipartReader", BodyPartReader]] = None
577594
self._at_eof = False
578595
self._at_bof = True
@@ -624,7 +641,24 @@ async def next(
624641
await self._read_boundary()
625642
if self._at_eof: # we just read the last boundary, nothing to do there
626643
return None
627-
self._last_part = await self.fetch_next_part()
644+
645+
part = await self.fetch_next_part()
646+
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.6
647+
if (
648+
self._last_part is None
649+
and self._mimetype.subtype == "form-data"
650+
and isinstance(part, BodyPartReader)
651+
):
652+
_, params = parse_content_disposition(part.headers.get(CONTENT_DISPOSITION))
653+
if params.get("name") == "_charset_":
654+
# Longest encoding in https://encoding.spec.whatwg.org/encodings.json
655+
# is 19 characters, so 32 should be more than enough for any valid encoding.
656+
charset = await part.read_chunk(32)
657+
if len(charset) > 31:
658+
raise RuntimeError("Invalid default charset")
659+
self._default_charset = charset.strip().decode()
660+
part = await self.fetch_next_part()
661+
self._last_part = part
628662
return self._last_part
629663

630664
async def release(self) -> None:
@@ -660,19 +694,16 @@ def _get_part_reader(
660694
return type(self)(headers, self._content)
661695
return self.multipart_reader_cls(headers, self._content)
662696
else:
663-
return self.part_reader_cls(self._boundary, headers, self._content)
664-
665-
def _get_boundary(self) -> str:
666-
mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
667-
668-
assert mimetype.type == "multipart", "multipart/* content type expected"
669-
670-
if "boundary" not in mimetype.parameters:
671-
raise ValueError(
672-
"boundary missed for Content-Type: %s" % self.headers[CONTENT_TYPE]
697+
return self.part_reader_cls(
698+
self._boundary,
699+
headers,
700+
self._content,
701+
subtype=self._mimetype.subtype,
702+
default_charset=self._default_charset,
673703
)
674704

675-
boundary = mimetype.parameters["boundary"]
705+
def _get_boundary(self) -> str:
706+
boundary = self._mimetype.parameters["boundary"]
676707
if len(boundary) > 70:
677708
raise ValueError("boundary %r is too long (70 chars max)" % boundary)
678709

@@ -765,6 +796,7 @@ def __init__(self, subtype: str = "mixed", boundary: Optional[str] = None) -> No
765796
super().__init__(None, content_type=ctype)
766797

767798
self._parts: List[_Part] = []
799+
self._is_form_data = subtype == "form-data"
768800

769801
def __enter__(self) -> "MultipartWriter":
770802
return self
@@ -842,32 +874,36 @@ def append(self, obj: Any, headers: Optional[Mapping[str, str]] = None) -> Paylo
842874

843875
def append_payload(self, payload: Payload) -> Payload:
844876
"""Adds a new body part to multipart writer."""
845-
# compression
846-
encoding: Optional[str] = payload.headers.get(
847-
CONTENT_ENCODING,
848-
"",
849-
).lower()
850-
if encoding and encoding not in ("deflate", "gzip", "identity"):
851-
raise RuntimeError(f"unknown content encoding: {encoding}")
852-
if encoding == "identity":
853-
encoding = None
854-
855-
# te encoding
856-
te_encoding: Optional[str] = payload.headers.get(
857-
CONTENT_TRANSFER_ENCODING,
858-
"",
859-
).lower()
860-
if te_encoding not in ("", "base64", "quoted-printable", "binary"):
861-
raise RuntimeError(
862-
"unknown content transfer encoding: {}" "".format(te_encoding)
877+
encoding: Optional[str] = None
878+
te_encoding: Optional[str] = None
879+
if self._is_form_data:
880+
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.7
881+
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
882+
assert CONTENT_DISPOSITION in payload.headers
883+
assert "name=" in payload.headers[CONTENT_DISPOSITION]
884+
assert (
885+
not {CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TRANSFER_ENCODING}
886+
& payload.headers.keys()
863887
)
864-
if te_encoding == "binary":
865-
te_encoding = None
866-
867-
# size
868-
size = payload.size
869-
if size is not None and not (encoding or te_encoding):
870-
payload.headers[CONTENT_LENGTH] = str(size)
888+
else:
889+
# compression
890+
encoding = payload.headers.get(CONTENT_ENCODING, "").lower()
891+
if encoding and encoding not in ("deflate", "gzip", "identity"):
892+
raise RuntimeError(f"unknown content encoding: {encoding}")
893+
if encoding == "identity":
894+
encoding = None
895+
896+
# te encoding
897+
te_encoding = payload.headers.get(CONTENT_TRANSFER_ENCODING, "").lower()
898+
if te_encoding not in ("", "base64", "quoted-printable", "binary"):
899+
raise RuntimeError(f"unknown content transfer encoding: {te_encoding}")
900+
if te_encoding == "binary":
901+
te_encoding = None
902+
903+
# size
904+
size = payload.size
905+
if size is not None and not (encoding or te_encoding):
906+
payload.headers[CONTENT_LENGTH] = str(size)
871907

872908
self._parts.append((payload, encoding, te_encoding)) # type: ignore[arg-type]
873909
return payload

tests/test_client_functional.py

+1-45
Original file line numberDiff line numberDiff line change
@@ -1388,50 +1388,6 @@ async def handler(request):
13881388
resp.close()
13891389

13901390

1391-
async def test_POST_DATA_with_context_transfer_encoding(aiohttp_client: Any) -> None:
1392-
async def handler(request):
1393-
data = await request.post()
1394-
assert data["name"] == "text"
1395-
return web.Response(text=data["name"])
1396-
1397-
app = web.Application()
1398-
app.router.add_post("/", handler)
1399-
client = await aiohttp_client(app)
1400-
1401-
form = aiohttp.FormData()
1402-
form.add_field("name", "text", content_transfer_encoding="base64")
1403-
1404-
resp = await client.post("/", data=form)
1405-
assert 200 == resp.status
1406-
content = await resp.text()
1407-
assert content == "text"
1408-
resp.close()
1409-
1410-
1411-
async def test_POST_DATA_with_content_type_context_transfer_encoding(
1412-
aiohttp_client: Any,
1413-
):
1414-
async def handler(request):
1415-
data = await request.post()
1416-
assert data["name"] == "text"
1417-
return web.Response(body=data["name"])
1418-
1419-
app = web.Application()
1420-
app.router.add_post("/", handler)
1421-
client = await aiohttp_client(app)
1422-
1423-
form = aiohttp.FormData()
1424-
form.add_field(
1425-
"name", "text", content_type="text/plain", content_transfer_encoding="base64"
1426-
)
1427-
1428-
resp = await client.post("/", data=form)
1429-
assert 200 == resp.status
1430-
content = await resp.text()
1431-
assert content == "text"
1432-
resp.close()
1433-
1434-
14351391
async def test_POST_MultiDict(aiohttp_client: Any) -> None:
14361392
async def handler(request):
14371393
data = await request.post()
@@ -1483,7 +1439,7 @@ async def handler(request):
14831439

14841440
with fname.open("rb") as f:
14851441
async with client.post(
1486-
"/", data={"some": f, "test": b"data"}, chunked=True
1442+
"/", data={"some": f, "test": io.BytesIO(b"data")}, chunked=True
14871443
) as resp:
14881444
assert 200 == resp.status
14891445

0 commit comments

Comments
 (0)