@@ -245,24 +245,25 @@ def read(
245
245
dtype_backend : DtypeBackend | lib .NoDefault = lib .no_default ,
246
246
storage_options : StorageOptions | None = None ,
247
247
filesystem = None ,
248
+ to_pandas_kwargs : dict [str , Any ] | None = None ,
248
249
** kwargs ,
249
250
) -> DataFrame :
250
251
kwargs ["use_pandas_metadata" ] = True
251
252
252
- to_pandas_kwargs = {}
253
+ to_pandas_kwargs = to_pandas_kwargs or {}
253
254
if dtype_backend == "numpy_nullable" :
254
255
from pandas .io ._util import _arrow_dtype_mapping
255
256
256
257
mapping = _arrow_dtype_mapping ()
257
258
to_pandas_kwargs ["types_mapper" ] = mapping .get
258
259
elif dtype_backend == "pyarrow" :
259
- to_pandas_kwargs ["types_mapper" ] = pd .ArrowDtype # type: ignore[assignment]
260
+ to_pandas_kwargs ["types_mapper" ] = pd .ArrowDtype
260
261
elif using_pyarrow_string_dtype ():
261
262
to_pandas_kwargs ["types_mapper" ] = arrow_string_types_mapper ()
262
263
263
264
manager = _get_option ("mode.data_manager" , silent = True )
264
265
if manager == "array" :
265
- to_pandas_kwargs ["split_blocks" ] = True # type: ignore[assignment]
266
+ to_pandas_kwargs ["split_blocks" ] = True
266
267
267
268
path_or_handle , handles , filesystem = _get_path_or_handle (
268
269
path ,
@@ -362,9 +363,11 @@ def read(
362
363
filters = None ,
363
364
storage_options : StorageOptions | None = None ,
364
365
filesystem = None ,
366
+ to_pandas_kwargs : dict [str , Any ] | None = None ,
365
367
** kwargs ,
366
368
) -> DataFrame :
367
369
parquet_kwargs : dict [str , Any ] = {}
370
+ to_pandas_kwargs = to_pandas_kwargs or {}
368
371
use_nullable_dtypes = kwargs .pop ("use_nullable_dtypes" , False )
369
372
dtype_backend = kwargs .pop ("dtype_backend" , lib .no_default )
370
373
# We are disabling nullable dtypes for fastparquet pending discussion
@@ -400,7 +403,7 @@ def read(
400
403
401
404
try :
402
405
parquet_file = self .api .ParquetFile (path , ** parquet_kwargs )
403
- return parquet_file .to_pandas (columns = columns , filters = filters , ** kwargs )
406
+ return parquet_file .to_pandas (columns = columns , filters = filters , ** to_pandas_kwargs , ** kwargs )
404
407
finally :
405
408
if handles is not None :
406
409
handles .close ()
@@ -465,7 +468,7 @@ def to_parquet(
465
468
.. versionadded:: 2.1.0
466
469
467
470
kwargs
468
- Additional keyword arguments passed to the engine
471
+ Additional keyword arguments passed to the engine.
469
472
470
473
Returns
471
474
-------
@@ -505,6 +508,7 @@ def read_parquet(
505
508
dtype_backend : DtypeBackend | lib .NoDefault = lib .no_default ,
506
509
filesystem : Any = None ,
507
510
filters : list [tuple ] | list [list [tuple ]] | None = None ,
511
+ to_pandas_kwargs : dict [str , Any ] | None = None ,
508
512
** kwargs ,
509
513
) -> DataFrame :
510
514
"""
@@ -588,6 +592,9 @@ def read_parquet(
588
592
589
593
.. versionadded:: 2.1.0
590
594
595
+ to_pandas_kwargs : dict[str, Any], default None
596
+ Dictionary of arguments passed to the underlying engine's ``to_pandas`` function.
597
+
591
598
**kwargs
592
599
Any additional kwargs are passed to the engine.
593
600
@@ -676,5 +683,6 @@ def read_parquet(
676
683
use_nullable_dtypes = use_nullable_dtypes ,
677
684
dtype_backend = dtype_backend ,
678
685
filesystem = filesystem ,
686
+ to_pandas_kwargs = to_pandas_kwargs ,
679
687
** kwargs ,
680
688
)
0 commit comments