@@ -260,12 +260,18 @@ def __init__(
260
260
boundary : bytes ,
261
261
headers : "CIMultiDictProxy[str]" ,
262
262
content : StreamReader ,
263
+ * ,
264
+ subtype : str = "mixed" ,
265
+ default_charset : Optional [str ] = None ,
263
266
) -> None :
264
267
self .headers = headers
265
268
self ._boundary = boundary
266
269
self ._content = content
270
+ self ._default_charset = default_charset
267
271
self ._at_eof = False
268
- length = self .headers .get (CONTENT_LENGTH , None )
272
+ self ._is_form_data = subtype == "form-data"
273
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
274
+ length = None if self ._is_form_data else self .headers .get (CONTENT_LENGTH , None )
269
275
self ._length = int (length ) if length is not None else None
270
276
self ._read_bytes = 0
271
277
self ._unread : Deque [bytes ] = deque ()
@@ -357,6 +363,8 @@ async def _read_chunk_from_length(self, size: int) -> bytes:
357
363
assert self ._length is not None , "Content-Length required for chunked read"
358
364
chunk_size = min (size , self ._length - self ._read_bytes )
359
365
chunk = await self ._content .read (chunk_size )
366
+ if self ._content .at_eof ():
367
+ self ._at_eof = True
360
368
return chunk
361
369
362
370
async def _read_chunk_from_stream (self , size : int ) -> bytes :
@@ -477,7 +485,8 @@ def decode(self, data: bytes) -> bytes:
477
485
"""
478
486
if CONTENT_TRANSFER_ENCODING in self .headers :
479
487
data = self ._decode_content_transfer (data )
480
- if CONTENT_ENCODING in self .headers :
488
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
489
+ if not self ._is_form_data and CONTENT_ENCODING in self .headers :
481
490
return self ._decode_content (data )
482
491
return data
483
492
@@ -511,7 +520,7 @@ def get_charset(self, default: str) -> str:
511
520
"""Returns charset parameter from Content-Type header or default."""
512
521
ctype = self .headers .get (CONTENT_TYPE , "" )
513
522
mimetype = parse_mimetype (ctype )
514
- return mimetype .parameters .get ("charset" , default )
523
+ return mimetype .parameters .get ("charset" , self . _default_charset or default )
515
524
516
525
@reify
517
526
def name (self ) -> Optional [str ]:
@@ -570,9 +579,17 @@ def __init__(
570
579
headers : Mapping [str , str ],
571
580
content : StreamReader ,
572
581
) -> None :
582
+ self ._mimetype = parse_mimetype (headers [CONTENT_TYPE ])
583
+ assert self ._mimetype .type == "multipart" , "multipart/* content type expected"
584
+ if "boundary" not in self ._mimetype .parameters :
585
+ raise ValueError (
586
+ "boundary missed for Content-Type: %s" % headers [CONTENT_TYPE ]
587
+ )
588
+
573
589
self .headers = headers
574
590
self ._boundary = ("--" + self ._get_boundary ()).encode ()
575
591
self ._content = content
592
+ self ._default_charset : Optional [str ] = None
576
593
self ._last_part : Optional [Union ["MultipartReader" , BodyPartReader ]] = None
577
594
self ._at_eof = False
578
595
self ._at_bof = True
@@ -624,7 +641,24 @@ async def next(
624
641
await self ._read_boundary ()
625
642
if self ._at_eof : # we just read the last boundary, nothing to do there
626
643
return None
627
- self ._last_part = await self .fetch_next_part ()
644
+
645
+ part = await self .fetch_next_part ()
646
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.6
647
+ if (
648
+ self ._last_part is None
649
+ and self ._mimetype .subtype == "form-data"
650
+ and isinstance (part , BodyPartReader )
651
+ ):
652
+ _ , params = parse_content_disposition (part .headers .get (CONTENT_DISPOSITION ))
653
+ if params .get ("name" ) == "_charset_" :
654
+ # Longest encoding in https://encoding.spec.whatwg.org/encodings.json
655
+ # is 19 characters, so 32 should be more than enough for any valid encoding.
656
+ charset = await part .read_chunk (32 )
657
+ if len (charset ) > 31 :
658
+ raise RuntimeError ("Invalid default charset" )
659
+ self ._default_charset = charset .strip ().decode ()
660
+ part = await self .fetch_next_part ()
661
+ self ._last_part = part
628
662
return self ._last_part
629
663
630
664
async def release (self ) -> None :
@@ -660,19 +694,16 @@ def _get_part_reader(
660
694
return type (self )(headers , self ._content )
661
695
return self .multipart_reader_cls (headers , self ._content )
662
696
else :
663
- return self .part_reader_cls (self ._boundary , headers , self ._content )
664
-
665
- def _get_boundary (self ) -> str :
666
- mimetype = parse_mimetype (self .headers [CONTENT_TYPE ])
667
-
668
- assert mimetype .type == "multipart" , "multipart/* content type expected"
669
-
670
- if "boundary" not in mimetype .parameters :
671
- raise ValueError (
672
- "boundary missed for Content-Type: %s" % self .headers [CONTENT_TYPE ]
697
+ return self .part_reader_cls (
698
+ self ._boundary ,
699
+ headers ,
700
+ self ._content ,
701
+ subtype = self ._mimetype .subtype ,
702
+ default_charset = self ._default_charset ,
673
703
)
674
704
675
- boundary = mimetype .parameters ["boundary" ]
705
+ def _get_boundary (self ) -> str :
706
+ boundary = self ._mimetype .parameters ["boundary" ]
676
707
if len (boundary ) > 70 :
677
708
raise ValueError ("boundary %r is too long (70 chars max)" % boundary )
678
709
@@ -765,6 +796,7 @@ def __init__(self, subtype: str = "mixed", boundary: Optional[str] = None) -> No
765
796
super ().__init__ (None , content_type = ctype )
766
797
767
798
self ._parts : List [_Part ] = []
799
+ self ._is_form_data = subtype == "form-data"
768
800
769
801
def __enter__ (self ) -> "MultipartWriter" :
770
802
return self
@@ -842,32 +874,36 @@ def append(self, obj: Any, headers: Optional[Mapping[str, str]] = None) -> Paylo
842
874
843
875
def append_payload (self , payload : Payload ) -> Payload :
844
876
"""Adds a new body part to multipart writer."""
845
- # compression
846
- encoding : Optional [str ] = payload .headers .get (
847
- CONTENT_ENCODING ,
848
- "" ,
849
- ).lower ()
850
- if encoding and encoding not in ("deflate" , "gzip" , "identity" ):
851
- raise RuntimeError (f"unknown content encoding: { encoding } " )
852
- if encoding == "identity" :
853
- encoding = None
854
-
855
- # te encoding
856
- te_encoding : Optional [str ] = payload .headers .get (
857
- CONTENT_TRANSFER_ENCODING ,
858
- "" ,
859
- ).lower ()
860
- if te_encoding not in ("" , "base64" , "quoted-printable" , "binary" ):
861
- raise RuntimeError (
862
- "unknown content transfer encoding: {}" "" .format (te_encoding )
877
+ encoding : Optional [str ] = None
878
+ te_encoding : Optional [str ] = None
879
+ if self ._is_form_data :
880
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.7
881
+ # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
882
+ assert CONTENT_DISPOSITION in payload .headers
883
+ assert "name=" in payload .headers [CONTENT_DISPOSITION ]
884
+ assert (
885
+ not {CONTENT_ENCODING , CONTENT_LENGTH , CONTENT_TRANSFER_ENCODING }
886
+ & payload .headers .keys ()
863
887
)
864
- if te_encoding == "binary" :
865
- te_encoding = None
866
-
867
- # size
868
- size = payload .size
869
- if size is not None and not (encoding or te_encoding ):
870
- payload .headers [CONTENT_LENGTH ] = str (size )
888
+ else :
889
+ # compression
890
+ encoding = payload .headers .get (CONTENT_ENCODING , "" ).lower ()
891
+ if encoding and encoding not in ("deflate" , "gzip" , "identity" ):
892
+ raise RuntimeError (f"unknown content encoding: { encoding } " )
893
+ if encoding == "identity" :
894
+ encoding = None
895
+
896
+ # te encoding
897
+ te_encoding = payload .headers .get (CONTENT_TRANSFER_ENCODING , "" ).lower ()
898
+ if te_encoding not in ("" , "base64" , "quoted-printable" , "binary" ):
899
+ raise RuntimeError (f"unknown content transfer encoding: { te_encoding } " )
900
+ if te_encoding == "binary" :
901
+ te_encoding = None
902
+
903
+ # size
904
+ size = payload .size
905
+ if size is not None and not (encoding or te_encoding ):
906
+ payload .headers [CONTENT_LENGTH ] = str (size )
871
907
872
908
self ._parts .append ((payload , encoding , te_encoding )) # type: ignore[arg-type]
873
909
return payload
0 commit comments