7
7
)
8
8
import bz2
9
9
import codecs
10
- from collections import abc
11
10
import dataclasses
12
11
import functools
13
12
import gzip
@@ -103,7 +102,6 @@ class IOHandles(Generic[AnyStr]):
103
102
compression : CompressionDict
104
103
created_handles : list [IO [bytes ] | IO [str ]] = dataclasses .field (default_factory = list )
105
104
is_wrapped : bool = False
106
- is_mmap : bool = False
107
105
108
106
def close (self ) -> None :
109
107
"""
@@ -687,14 +685,7 @@ def get_handle(
687
685
688
686
# memory mapping needs to be the first step
689
687
# only used for read_csv
690
- handle , memory_map , handles = _maybe_memory_map (
691
- handle ,
692
- memory_map ,
693
- ioargs .encoding ,
694
- ioargs .mode ,
695
- errors ,
696
- ioargs .compression ["method" ] not in _supported_compressions ,
697
- )
688
+ handle , memory_map , handles = _maybe_memory_map (handle , memory_map )
698
689
699
690
is_path = isinstance (handle , str )
700
691
compression_args = dict (ioargs .compression )
@@ -841,12 +832,19 @@ def get_handle(
841
832
handle ,
842
833
encoding = ioargs .encoding ,
843
834
)
844
- elif is_text and (compression or _is_binary_mode (handle , ioargs .mode )):
835
+ elif is_text and (
836
+ compression or memory_map or _is_binary_mode (handle , ioargs .mode )
837
+ ):
838
+ if (
839
+ not hasattr (handle , "readable" )
840
+ or not hasattr (handle , "writable" )
841
+ or not hasattr (handle , "seekable" )
842
+ ):
843
+ handle = _IOWrapper (handle )
844
+ # error: Argument 1 to "TextIOWrapper" has incompatible type
845
+ # "_IOWrapper"; expected "IO[bytes]"
845
846
handle = TextIOWrapper (
846
- # error: Argument 1 to "TextIOWrapper" has incompatible type
847
- # "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]";
848
- # expected "IO[bytes]"
849
- _IOWrapper (handle ), # type: ignore[arg-type]
847
+ handle , # type: ignore[arg-type]
850
848
encoding = ioargs .encoding ,
851
849
errors = errors ,
852
850
newline = "" ,
@@ -877,7 +875,6 @@ def get_handle(
877
875
# "List[BaseBuffer]"; expected "List[Union[IO[bytes], IO[str]]]"
878
876
created_handles = handles , # type: ignore[arg-type]
879
877
is_wrapped = is_wrapped ,
880
- is_mmap = memory_map ,
881
878
compression = ioargs .compression ,
882
879
)
883
880
@@ -1001,75 +998,6 @@ def write_to_buffer(self) -> None:
1001
998
self .buffer .writestr (archive_name , self .getvalue ())
1002
999
1003
1000
1004
- class _CSVMMapWrapper (abc .Iterator ):
1005
- """
1006
- Wrapper for the Python's mmap class so that it can be properly read in
1007
- by Python's csv.reader class.
1008
-
1009
- Parameters
1010
- ----------
1011
- f : file object
1012
- File object to be mapped onto memory. Must support the 'fileno'
1013
- method or have an equivalent attribute
1014
-
1015
- """
1016
-
1017
- def __init__ (
1018
- self ,
1019
- f : ReadBuffer [bytes ],
1020
- encoding : str = "utf-8" ,
1021
- errors : str = "strict" ,
1022
- decode : bool = True ,
1023
- ) -> None :
1024
- self .encoding = encoding
1025
- self .errors = errors
1026
- self .decoder = codecs .getincrementaldecoder (encoding )(errors = errors )
1027
- self .decode = decode
1028
-
1029
- # needed for compression libraries and TextIOWrapper
1030
- self .attributes = {}
1031
- for attribute in ("seekable" , "readable" ):
1032
- if not hasattr (f , attribute ):
1033
- continue
1034
- self .attributes [attribute ] = getattr (f , attribute )()
1035
-
1036
- self .mmap = mmap .mmap (f .fileno (), 0 , access = mmap .ACCESS_READ )
1037
-
1038
- def __getattr__ (self , name : str ):
1039
- if name in self .attributes :
1040
- return lambda : self .attributes [name ]
1041
- return getattr (self .mmap , name )
1042
-
1043
- def __iter__ (self ) -> _CSVMMapWrapper :
1044
- return self
1045
-
1046
- def read (self , size : int = - 1 ) -> str | bytes :
1047
- # CSV c-engine uses read instead of iterating
1048
- content : bytes = self .mmap .read (size )
1049
- if self .decode and self .encoding != "utf-8" :
1050
- # memory mapping is applied before compression. Encoding should
1051
- # be applied to the de-compressed data.
1052
- final = size == - 1 or len (content ) < size
1053
- return self .decoder .decode (content , final = final )
1054
- return content
1055
-
1056
- def __next__ (self ) -> str :
1057
- newbytes = self .mmap .readline ()
1058
-
1059
- # readline returns bytes, not str, but Python's CSV reader
1060
- # expects str, so convert the output to str before continuing
1061
- newline = self .decoder .decode (newbytes )
1062
-
1063
- # mmap doesn't raise if reading past the allocated
1064
- # data but instead returns an empty string, so raise
1065
- # if that is returned
1066
- if newline == "" :
1067
- raise StopIteration
1068
-
1069
- # IncrementalDecoder seems to push newline to the next line
1070
- return newline .lstrip ("\n " )
1071
-
1072
-
1073
1001
class _IOWrapper :
1074
1002
# TextIOWrapper is overly strict: it request that the buffer has seekable, readable,
1075
1003
# and writable. If we have a read-only buffer, we shouldn't need writable and vice
@@ -1131,12 +1059,7 @@ def read(self, n: int | None = -1) -> bytes:
1131
1059
1132
1060
1133
1061
def _maybe_memory_map (
1134
- handle : str | BaseBuffer ,
1135
- memory_map : bool ,
1136
- encoding : str ,
1137
- mode : str ,
1138
- errors : str ,
1139
- decode : bool ,
1062
+ handle : str | BaseBuffer , memory_map : bool
1140
1063
) -> tuple [str | BaseBuffer , bool , list [BaseBuffer ]]:
1141
1064
"""Try to memory map file/buffer."""
1142
1065
handles : list [BaseBuffer ] = []
@@ -1149,22 +1072,21 @@ def _maybe_memory_map(
1149
1072
handle = open (handle , "rb" )
1150
1073
handles .append (handle )
1151
1074
1152
- # error: Argument 1 to "_MMapWrapper" has incompatible type "Union[IO[Any],
1153
- # RawIOBase, BufferedIOBase, TextIOBase, mmap]"; expected "IO[Any]"
1154
1075
try :
1155
- # open mmap, adds *-able, and convert to string
1156
- wrapped = cast (
1157
- BaseBuffer ,
1158
- _CSVMMapWrapper (handle , encoding , errors , decode ), # type: ignore[arg-type]
1076
+ # open mmap and adds *-able
1077
+ # error: Argument 1 to "_IOWrapper" has incompatible type "mmap";
1078
+ # expected "BaseBuffer"
1079
+ wrapped = _IOWrapper (
1080
+ mmap .mmap (
1081
+ handle .fileno (), 0 , access = mmap .ACCESS_READ # type: ignore[arg-type]
1082
+ )
1159
1083
)
1160
1084
finally :
1161
1085
for handle in reversed (handles ):
1162
1086
# error: "BaseBuffer" has no attribute "close"
1163
1087
handle .close () # type: ignore[attr-defined]
1164
- handles = []
1165
- handles .append (wrapped )
1166
1088
1167
- return wrapped , memory_map , handles
1089
+ return wrapped , memory_map , [ wrapped ]
1168
1090
1169
1091
1170
1092
def file_exists (filepath_or_buffer : FilePath | BaseBuffer ) -> bool :
0 commit comments