14
14
from pandas import DataFrame , MultiIndex , get_option
15
15
from pandas .core import generic
16
16
17
- from pandas .io .common import IOHandles , get_handle , is_fsspec_url , stringify_path
17
+ from pandas .io .common import (
18
+ IOHandles ,
19
+ get_handle ,
20
+ is_fsspec_url ,
21
+ is_url ,
22
+ stringify_path ,
23
+ )
18
24
19
25
20
26
def get_engine (engine : str ) -> "BaseImpl" :
@@ -66,8 +72,10 @@ def _get_path_or_handle(
66
72
fs , path_or_handle = fsspec .core .url_to_fs (
67
73
path_or_handle , ** (storage_options or {})
68
74
)
69
- elif storage_options :
70
- raise ValueError ("storage_options passed with buffer or non-fsspec filepath" )
75
+ elif storage_options and (not is_url (path_or_handle ) or mode != "rb" ):
76
+ # can't write to a remote url
77
+ # without making use of fsspec at the moment
78
+ raise ValueError ("storage_options passed with buffer, or non-supported URL" )
71
79
72
80
handles = None
73
81
if (
@@ -79,7 +87,9 @@ def _get_path_or_handle(
79
87
# use get_handle only when we are very certain that it is not a directory
80
88
# fsspec resources can also point to directories
81
89
# this branch is used for example when reading from non-fsspec URLs
82
- handles = get_handle (path_or_handle , mode , is_text = False )
90
+ handles = get_handle (
91
+ path_or_handle , mode , is_text = False , storage_options = storage_options
92
+ )
83
93
fs = None
84
94
path_or_handle = handles .handle
85
95
return path_or_handle , handles , fs
@@ -307,7 +317,9 @@ def read(
307
317
# use get_handle only when we are very certain that it is not a directory
308
318
# fsspec resources can also point to directories
309
319
# this branch is used for example when reading from non-fsspec URLs
310
- handles = get_handle (path , "rb" , is_text = False )
320
+ handles = get_handle (
321
+ path , "rb" , is_text = False , storage_options = storage_options
322
+ )
311
323
path = handles .handle
312
324
parquet_file = self .api .ParquetFile (path , ** parquet_kwargs )
313
325
@@ -404,10 +416,12 @@ def to_parquet(
404
416
return None
405
417
406
418
419
+ @doc (storage_options = generic ._shared_docs ["storage_options" ])
407
420
def read_parquet (
408
421
path ,
409
422
engine : str = "auto" ,
410
423
columns = None ,
424
+ storage_options : StorageOptions = None ,
411
425
use_nullable_dtypes : bool = False ,
412
426
** kwargs ,
413
427
):
@@ -432,13 +446,18 @@ def read_parquet(
432
446
By file-like object, we refer to objects with a ``read()`` method,
433
447
such as a file handle (e.g. via builtin ``open`` function)
434
448
or ``StringIO``.
435
- engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
449
+ engine : {{ 'auto', 'pyarrow', 'fastparquet'} }, default 'auto'
436
450
Parquet library to use. If 'auto', then the option
437
451
``io.parquet.engine`` is used. The default ``io.parquet.engine``
438
452
behavior is to try 'pyarrow', falling back to 'fastparquet' if
439
453
'pyarrow' is unavailable.
440
454
columns : list, default=None
441
455
If not None, only these columns will be read from the file.
456
+
457
+ {storage_options}
458
+
459
+ .. versionadded:: 1.3.0
460
+
442
461
use_nullable_dtypes : bool, default False
443
462
If True, use dtypes that use ``pd.NA`` as missing value indicator
444
463
for the resulting DataFrame (only applicable for ``engine="pyarrow"``).
@@ -448,6 +467,7 @@ def read_parquet(
448
467
support dtypes) may change without notice.
449
468
450
469
.. versionadded:: 1.2.0
470
+
451
471
**kwargs
452
472
Any additional kwargs are passed to the engine.
453
473
@@ -456,6 +476,11 @@ def read_parquet(
456
476
DataFrame
457
477
"""
458
478
impl = get_engine (engine )
479
+
459
480
return impl .read (
460
- path , columns = columns , use_nullable_dtypes = use_nullable_dtypes , ** kwargs
481
+ path ,
482
+ columns = columns ,
483
+ storage_options = storage_options ,
484
+ use_nullable_dtypes = use_nullable_dtypes ,
485
+ ** kwargs ,
461
486
)
0 commit comments