Skip to content

Commit 19c541b

Browse files
committed
fix: Expose to_pandas_kwargs in read_parquet for pyarrow engine
1 parent 3c96b8f commit 19c541b

File tree

2 files changed

+20
-5
lines changed

2 files changed

+20
-5
lines changed

doc/source/user_guide/io.rst

+7
Original file line numberDiff line numberDiff line change
@@ -5420,6 +5420,13 @@ Read only certain columns of a parquet file.
54205420
)
54215421
result.dtypes
54225422
5423+
Pass additional keyword arguments to the underlying engine's ``to_pandas`` function.
5424+
5425+
.. ipython:: python
5426+
5427+
result = pd.read_parquet("example_pa.parquet", engine="pyarrow", to_pandas_kwargs={ "timestamp_as_object": True })
5428+
5429+
result.dtypes
54235430
54245431
.. ipython:: python
54255432
:suppress:

pandas/io/parquet.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -245,24 +245,25 @@ def read(
245245
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
246246
storage_options: StorageOptions | None = None,
247247
filesystem=None,
248+
to_pandas_kwargs: dict[str, Any] | None = None,
248249
**kwargs,
249250
) -> DataFrame:
250251
kwargs["use_pandas_metadata"] = True
251252

252-
to_pandas_kwargs = {}
253+
to_pandas_kwargs = to_pandas_kwargs or {}
253254
if dtype_backend == "numpy_nullable":
254255
from pandas.io._util import _arrow_dtype_mapping
255256

256257
mapping = _arrow_dtype_mapping()
257258
to_pandas_kwargs["types_mapper"] = mapping.get
258259
elif dtype_backend == "pyarrow":
259-
to_pandas_kwargs["types_mapper"] = pd.ArrowDtype # type: ignore[assignment]
260+
to_pandas_kwargs["types_mapper"] = pd.ArrowDtype
260261
elif using_pyarrow_string_dtype():
261262
to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()
262263

263264
manager = _get_option("mode.data_manager", silent=True)
264265
if manager == "array":
265-
to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment]
266+
to_pandas_kwargs["split_blocks"] = True
266267

267268
path_or_handle, handles, filesystem = _get_path_or_handle(
268269
path,
@@ -362,9 +363,11 @@ def read(
362363
filters=None,
363364
storage_options: StorageOptions | None = None,
364365
filesystem=None,
366+
to_pandas_kwargs: dict[str, Any] | None = None,
365367
**kwargs,
366368
) -> DataFrame:
367369
parquet_kwargs: dict[str, Any] = {}
370+
to_pandas_kwargs = to_pandas_kwargs or {}
368371
use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False)
369372
dtype_backend = kwargs.pop("dtype_backend", lib.no_default)
370373
# We are disabling nullable dtypes for fastparquet pending discussion
@@ -400,7 +403,7 @@ def read(
400403

401404
try:
402405
parquet_file = self.api.ParquetFile(path, **parquet_kwargs)
403-
return parquet_file.to_pandas(columns=columns, filters=filters, **kwargs)
406+
return parquet_file.to_pandas(columns=columns, filters=filters, **to_pandas_kwargs, **kwargs)
404407
finally:
405408
if handles is not None:
406409
handles.close()
@@ -465,7 +468,7 @@ def to_parquet(
465468
.. versionadded:: 2.1.0
466469
467470
kwargs
468-
Additional keyword arguments passed to the engine
471+
Additional keyword arguments passed to the engine.
469472
470473
Returns
471474
-------
@@ -505,6 +508,7 @@ def read_parquet(
505508
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
506509
filesystem: Any = None,
507510
filters: list[tuple] | list[list[tuple]] | None = None,
511+
to_pandas_kwargs: dict[str, Any] | None = None,
508512
**kwargs,
509513
) -> DataFrame:
510514
"""
@@ -588,6 +592,9 @@ def read_parquet(
588592
589593
.. versionadded:: 2.1.0
590594
595+
to_pandas_kwargs : dict[str, Any], default None
596+
Dictionary of arguments passed to the underlying engine's ``to_pandas`` function.
597+
591598
**kwargs
592599
Any additional kwargs are passed to the engine.
593600
@@ -676,5 +683,6 @@ def read_parquet(
676683
use_nullable_dtypes=use_nullable_dtypes,
677684
dtype_backend=dtype_backend,
678685
filesystem=filesystem,
686+
to_pandas_kwargs=to_pandas_kwargs,
679687
**kwargs,
680688
)

0 commit comments

Comments
 (0)