@@ -640,7 +640,7 @@ def get_handle(
640
640
.. versionchanged:: 1.4.0 Zstandard support.
641
641
642
642
memory_map : bool, default False
643
- See parsers._parser_params for more information.
643
+ See parsers._parser_params for more information. Only used by read_csv.
644
644
is_text : bool, default True
645
645
Whether the type of the content passed to the file/buffer is string or
646
646
bytes. This is not the same as `"b" not in mode`. If a string content is
@@ -659,6 +659,8 @@ def get_handle(
659
659
# Windows does not default to utf-8. Set to utf-8 for a consistent behavior
660
660
encoding = encoding or "utf-8"
661
661
662
+ errors = errors or "strict"
663
+
662
664
# read_csv does not know whether the buffer is opened in binary/text mode
663
665
if _is_binary_mode (path_or_buf , mode ) and "b" not in mode :
664
666
mode += "b"
@@ -681,6 +683,7 @@ def get_handle(
681
683
handles : list [BaseBuffer ]
682
684
683
685
# memory mapping needs to be the first step
686
+ # only used for read_csv
684
687
handle , memory_map , handles = _maybe_memory_map (
685
688
handle ,
686
689
memory_map ,
@@ -1064,7 +1067,7 @@ def closed(self):
1064
1067
return self .fp is None
1065
1068
1066
1069
1067
- class _MMapWrapper (abc .Iterator ):
1070
+ class _CSVMMapWrapper (abc .Iterator ):
1068
1071
"""
1069
1072
Wrapper for the Python's mmap class so that it can be properly read in
1070
1073
by Python's csv.reader class.
@@ -1079,7 +1082,7 @@ class _MMapWrapper(abc.Iterator):
1079
1082
1080
1083
def __init__ (
1081
1084
self ,
1082
- f : IO ,
1085
+ f : ReadBuffer [ bytes ] ,
1083
1086
encoding : str = "utf-8" ,
1084
1087
errors : str = "strict" ,
1085
1088
decode : bool = True ,
@@ -1089,19 +1092,21 @@ def __init__(
1089
1092
self .decoder = codecs .getincrementaldecoder (encoding )(errors = errors )
1090
1093
self .decode = decode
1091
1094
1095
+ # needed for compression libraries and TextIOWrapper
1092
1096
self .attributes = {}
1093
1097
for attribute in ("seekable" , "readable" ):
1094
1098
if not hasattr (f , attribute ):
1095
1099
continue
1096
1100
self .attributes [attribute ] = getattr (f , attribute )()
1101
+
1097
1102
self .mmap = mmap .mmap (f .fileno (), 0 , access = mmap .ACCESS_READ )
1098
1103
1099
1104
def __getattr__ (self , name : str ):
1100
1105
if name in self .attributes :
1101
1106
return lambda : self .attributes [name ]
1102
1107
return getattr (self .mmap , name )
1103
1108
1104
- def __iter__ (self ) -> _MMapWrapper :
1109
+ def __iter__ (self ) -> _CSVMMapWrapper :
1105
1110
return self
1106
1111
1107
1112
def read (self , size : int = - 1 ) -> str | bytes :
@@ -1196,7 +1201,7 @@ def _maybe_memory_map(
1196
1201
memory_map : bool ,
1197
1202
encoding : str ,
1198
1203
mode : str ,
1199
- errors : str | None ,
1204
+ errors : str ,
1200
1205
decode : bool ,
1201
1206
) -> tuple [str | BaseBuffer , bool , list [BaseBuffer ]]:
1202
1207
"""Try to memory map file/buffer."""
@@ -1207,25 +1212,22 @@ def _maybe_memory_map(
1207
1212
1208
1213
# need to open the file first
1209
1214
if isinstance (handle , str ):
1210
- if encoding and "b" not in mode :
1211
- # Encoding
1212
- handle = open (handle , mode , encoding = encoding , errors = errors , newline = "" )
1213
- else :
1214
- # Binary mode
1215
- handle = open (handle , mode )
1215
+ handle = open (handle , "rb" )
1216
1216
handles .append (handle )
1217
1217
1218
1218
# error: Argument 1 to "_MMapWrapper" has incompatible type "Union[IO[Any],
1219
1219
# RawIOBase, BufferedIOBase, TextIOBase, mmap]"; expected "IO[Any]"
1220
1220
try :
1221
+ # open mmap, adds *-able, and convert to string
1221
1222
wrapped = cast (
1222
1223
BaseBuffer ,
1223
- _MMapWrapper (handle , encoding , errors , decode ), # type: ignore[arg-type]
1224
+ _CSVMMapWrapper (handle , encoding , errors , decode ), # type: ignore[arg-type]
1224
1225
)
1225
1226
finally :
1226
1227
for handle in reversed (handles ):
1227
1228
# error: "BaseBuffer" has no attribute "close"
1228
1229
handle .close () # type: ignore[attr-defined]
1230
+ handles = []
1229
1231
handles .append (wrapped )
1230
1232
1231
1233
return wrapped , memory_map , handles
0 commit comments