10
10
import mmap
11
11
import os
12
12
import pathlib
13
+ from typing import IO , AnyStr , BinaryIO , Optional , TextIO , Type
13
14
from urllib .error import URLError # noqa
14
15
from urllib .parse import ( # noqa
15
16
urlencode ,
32
33
33
34
from pandas .core .dtypes .common import is_file_like
34
35
36
+ from pandas ._typing import FilePathOrBuffer
37
+
35
38
# gh-12665: Alias for now and remove later.
36
39
CParserError = ParserError
37
40
@@ -68,14 +71,14 @@ class BaseIterator:
68
71
Useful only when the object being iterated is non-reusable (e.g. OK for a
69
72
parser, not for an in-memory table, yes for its iterator)."""
70
73
71
- def __iter__ (self ):
74
+ def __iter__ (self ) -> "BaseIterator" :
72
75
return self
73
76
74
77
def __next__ (self ):
75
78
raise AbstractMethodError (self )
76
79
77
80
78
- def _is_url (url ):
81
+ def _is_url (url ) -> bool :
79
82
"""Check to see if a URL has a valid protocol.
80
83
81
84
Parameters
@@ -93,7 +96,9 @@ def _is_url(url):
93
96
return False
94
97
95
98
96
- def _expand_user (filepath_or_buffer ):
99
+ def _expand_user (
100
+ filepath_or_buffer : FilePathOrBuffer [AnyStr ]
101
+ ) -> FilePathOrBuffer [AnyStr ]:
97
102
"""Return the argument with an initial component of ~ or ~user
98
103
replaced by that user's home directory.
99
104
@@ -111,7 +116,7 @@ def _expand_user(filepath_or_buffer):
111
116
return filepath_or_buffer
112
117
113
118
114
- def _validate_header_arg (header ):
119
+ def _validate_header_arg (header ) -> None :
115
120
if isinstance (header , bool ):
116
121
raise TypeError (
117
122
"Passing a bool to header is invalid. "
@@ -121,7 +126,9 @@ def _validate_header_arg(header):
121
126
)
122
127
123
128
124
- def _stringify_path (filepath_or_buffer ):
129
+ def _stringify_path (
130
+ filepath_or_buffer : FilePathOrBuffer [AnyStr ]
131
+ ) -> FilePathOrBuffer [AnyStr ]:
125
132
"""Attempt to convert a path-like object to a string.
126
133
127
134
Parameters
@@ -144,21 +151,22 @@ def _stringify_path(filepath_or_buffer):
144
151
strings, buffers, or anything else that's not even path-like.
145
152
"""
146
153
if hasattr (filepath_or_buffer , "__fspath__" ):
147
- return filepath_or_buffer .__fspath__ ()
154
+ # https://github.com/python/mypy/issues/1424
155
+ return filepath_or_buffer .__fspath__ () # type: ignore
148
156
elif isinstance (filepath_or_buffer , pathlib .Path ):
149
157
return str (filepath_or_buffer )
150
158
return _expand_user (filepath_or_buffer )
151
159
152
160
153
- def is_s3_url (url ):
161
+ def is_s3_url (url ) -> bool :
154
162
"""Check for an s3, s3n, or s3a url"""
155
163
try :
156
164
return parse_url (url ).scheme in ["s3" , "s3n" , "s3a" ]
157
165
except Exception :
158
166
return False
159
167
160
168
161
- def is_gcs_url (url ):
169
+ def is_gcs_url (url ) -> bool :
162
170
"""Check for a gcs url"""
163
171
try :
164
172
return parse_url (url ).scheme in ["gcs" , "gs" ]
@@ -167,7 +175,10 @@ def is_gcs_url(url):
167
175
168
176
169
177
def get_filepath_or_buffer (
170
- filepath_or_buffer , encoding = None , compression = None , mode = None
178
+ filepath_or_buffer : FilePathOrBuffer ,
179
+ encoding : Optional [str ] = None ,
180
+ compression : Optional [str ] = None ,
181
+ mode : Optional [str ] = None ,
171
182
):
172
183
"""
173
184
If the filepath_or_buffer is a url, translate and return the buffer.
@@ -190,7 +201,7 @@ def get_filepath_or_buffer(
190
201
"""
191
202
filepath_or_buffer = _stringify_path (filepath_or_buffer )
192
203
193
- if _is_url (filepath_or_buffer ):
204
+ if isinstance ( filepath_or_buffer , str ) and _is_url (filepath_or_buffer ):
194
205
req = urlopen (filepath_or_buffer )
195
206
content_encoding = req .headers .get ("Content-Encoding" , None )
196
207
if content_encoding == "gzip" :
@@ -224,7 +235,7 @@ def get_filepath_or_buffer(
224
235
return filepath_or_buffer , None , compression , False
225
236
226
237
227
- def file_path_to_url (path ) :
238
+ def file_path_to_url (path : str ) -> str :
228
239
"""
229
240
converts an absolute native path to a FILE URL.
230
241
@@ -242,7 +253,9 @@ def file_path_to_url(path):
242
253
_compression_to_extension = {"gzip" : ".gz" , "bz2" : ".bz2" , "zip" : ".zip" , "xz" : ".xz" }
243
254
244
255
245
- def _infer_compression (filepath_or_buffer , compression ):
256
+ def _infer_compression (
257
+ filepath_or_buffer : FilePathOrBuffer , compression : Optional [str ]
258
+ ) -> Optional [str ]:
246
259
"""
247
260
Get the compression method for filepath_or_buffer. If compression='infer',
248
261
the inferred compression method is returned. Otherwise, the input
@@ -435,7 +448,13 @@ class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore
435
448
"""
436
449
437
450
# GH 17778
438
- def __init__ (self , file , mode , compression = zipfile .ZIP_DEFLATED , ** kwargs ):
451
+ def __init__ (
452
+ self ,
453
+ file : FilePathOrBuffer ,
454
+ mode : str ,
455
+ compression : int = zipfile .ZIP_DEFLATED ,
456
+ ** kwargs
457
+ ):
439
458
if mode in ["wb" , "rb" ]:
440
459
mode = mode .replace ("b" , "" )
441
460
super ().__init__ (file , mode , compression , ** kwargs )
@@ -461,16 +480,16 @@ class MMapWrapper(BaseIterator):
461
480
462
481
"""
463
482
464
- def __init__ (self , f ):
483
+ def __init__ (self , f : IO ):
465
484
self .mmap = mmap .mmap (f .fileno (), 0 , access = mmap .ACCESS_READ )
466
485
467
- def __getattr__ (self , name ):
486
+ def __getattr__ (self , name : str ):
468
487
return getattr (self .mmap , name )
469
488
470
- def __iter__ (self ):
489
+ def __iter__ (self ) -> "MMapWrapper" :
471
490
return self
472
491
473
- def __next__ (self ):
492
+ def __next__ (self ) -> str :
474
493
newline = self .mmap .readline ()
475
494
476
495
# readline returns bytes, not str, but Python's CSV reader
@@ -491,16 +510,16 @@ class UTF8Recoder(BaseIterator):
491
510
Iterator that reads an encoded stream and re-encodes the input to UTF-8
492
511
"""
493
512
494
- def __init__ (self , f , encoding ):
513
+ def __init__ (self , f : BinaryIO , encoding : str ):
495
514
self .reader = codecs .getreader (encoding )(f )
496
515
497
- def read (self , bytes = - 1 ):
516
+ def read (self , bytes : int = - 1 ) -> bytes :
498
517
return self .reader .read (bytes ).encode ("utf-8" )
499
518
500
- def readline (self ):
519
+ def readline (self ) -> bytes :
501
520
return self .reader .readline ().encode ("utf-8" )
502
521
503
- def next (self ):
522
+ def next (self ) -> bytes :
504
523
return next (self .reader ).encode ("utf-8" )
505
524
506
525
@@ -511,5 +530,7 @@ def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
511
530
return csv .reader (f , dialect = dialect , ** kwds )
512
531
513
532
514
- def UnicodeWriter (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
533
+ def UnicodeWriter (
534
+ f : TextIO , dialect : Type [csv .Dialect ] = csv .excel , encoding : str = "utf-8" , ** kwds
535
+ ):
515
536
return csv .writer (f , dialect = dialect , ** kwds )
0 commit comments