3
3
from io import BufferedIOBase , BytesIO , RawIOBase
4
4
import os
5
5
from textwrap import fill
6
- from typing import Union
6
+ from typing import Any , Mapping , Union
7
7
8
8
from pandas ._config import config
9
9
10
10
from pandas ._libs .parsers import STR_NA_VALUES
11
+ from pandas ._typing import StorageOptions
11
12
from pandas .errors import EmptyDataError
12
13
from pandas .util ._decorators import Appender , deprecate_nonkeyword_arguments
13
14
199
200
Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
200
201
'X'...'X'. Passing in False will cause data to be overwritten if there
201
202
are duplicate names in the columns.
203
+ storage_options : StorageOptions
204
+ Extra options that make sense for a particular storage connection, e.g.
205
+ host, port, username, password, etc., if using a URL that will
206
+ be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error
207
+ will be raised if providing this argument with a local path or
208
+ a file-like buffer. See the fsspec and backend storage implementation
209
+ docs for the set of allowed keys and values
210
+
211
+ .. versionadded:: 1.2.0
202
212
203
213
Returns
204
214
-------
@@ -298,10 +308,11 @@ def read_excel(
298
308
skipfooter = 0 ,
299
309
convert_float = True ,
300
310
mangle_dupe_cols = True ,
311
+ storage_options : StorageOptions = None ,
301
312
):
302
313
303
314
if not isinstance (io , ExcelFile ):
304
- io = ExcelFile (io , engine = engine )
315
+ io = ExcelFile (io , storage_options = storage_options , engine = engine )
305
316
elif engine and engine != io .engine :
306
317
raise ValueError (
307
318
"Engine should not be specified when passing "
@@ -336,12 +347,14 @@ def read_excel(
336
347
337
348
338
349
class _BaseExcelReader (metaclass = abc .ABCMeta ):
339
- def __init__ (self , filepath_or_buffer ):
350
+ def __init__ (self , filepath_or_buffer , storage_options : StorageOptions = None ):
340
351
# If filepath_or_buffer is a url, load the data into a BytesIO
341
352
if is_url (filepath_or_buffer ):
342
353
filepath_or_buffer = BytesIO (urlopen (filepath_or_buffer ).read ())
343
354
elif not isinstance (filepath_or_buffer , (ExcelFile , self ._workbook_class )):
344
- filepath_or_buffer , _ , _ , _ = get_filepath_or_buffer (filepath_or_buffer )
355
+ filepath_or_buffer , _ , _ , _ = get_filepath_or_buffer (
356
+ filepath_or_buffer , storage_options = storage_options
357
+ )
345
358
346
359
if isinstance (filepath_or_buffer , self ._workbook_class ):
347
360
self .book = filepath_or_buffer
@@ -837,14 +850,16 @@ class ExcelFile:
837
850
from pandas .io .excel ._pyxlsb import _PyxlsbReader
838
851
from pandas .io .excel ._xlrd import _XlrdReader
839
852
840
- _engines = {
853
+ _engines : Mapping [ str , Any ] = {
841
854
"xlrd" : _XlrdReader ,
842
855
"openpyxl" : _OpenpyxlReader ,
843
856
"odf" : _ODFReader ,
844
857
"pyxlsb" : _PyxlsbReader ,
845
858
}
846
859
847
- def __init__ (self , path_or_buffer , engine = None ):
860
+ def __init__ (
861
+ self , path_or_buffer , engine = None , storage_options : StorageOptions = None
862
+ ):
848
863
if engine is None :
849
864
engine = "xlrd"
850
865
if isinstance (path_or_buffer , (BufferedIOBase , RawIOBase )):
@@ -858,13 +873,14 @@ def __init__(self, path_or_buffer, engine=None):
858
873
raise ValueError (f"Unknown engine: { engine } " )
859
874
860
875
self .engine = engine
876
+ self .storage_options = storage_options
861
877
862
878
# Could be a str, ExcelFile, Book, etc.
863
879
self .io = path_or_buffer
864
880
# Always a string
865
881
self ._io = stringify_path (path_or_buffer )
866
882
867
- self ._reader = self ._engines [engine ](self ._io )
883
+ self ._reader = self ._engines [engine ](self ._io , storage_options = storage_options )
868
884
869
885
def __fspath__ (self ):
870
886
return self ._io
0 commit comments