From 086a36da66292112ec66dad24a43ebd419cc5184 Mon Sep 17 00:00:00 2001 From: akashthemosh <107898164+akashthemosh@users.noreply.github.com> Date: Wed, 12 Jun 2024 03:54:11 +0000 Subject: [PATCH] Fix: Add to_pandas_kwargs to read_parquet for PyArrow engine Adds the `to_pandas_kwargs` parameter to `pd.read_parquet` to allow passing arguments to `pyarrow.Table.to_pandas`. This addresses issues that may arise during Parquet-to-DataFrame conversion, such as handling microsecond timestamps. Fixes #49236 --- pandas/io/parquet.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 306b144811898..0fb6f05b8b471 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -245,6 +245,7 @@ def read( dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, storage_options: StorageOptions | None = None, filesystem=None, + to_pandas_kwargs: dict[str, Any] | None = None, **kwargs, ) -> DataFrame: kwargs["use_pandas_metadata"] = True @@ -280,7 +281,7 @@ def read( "make_block is deprecated", DeprecationWarning, ) - result = pa_table.to_pandas(**to_pandas_kwargs) + result = pa_table.to_pandas(**(to_pandas_kwargs or {})) if pa_table.schema.metadata: if b"PANDAS_ATTRS" in pa_table.schema.metadata: @@ -505,6 +506,7 @@ def read_parquet( dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, filesystem: Any = None, filters: list[tuple] | list[list[tuple]] | None = None, + to_pandas_kwargs: dict[str, Any] | None = None, **kwargs, ) -> DataFrame: """ @@ -540,6 +542,11 @@ def read_parquet( If not None, only these columns will be read from the file. {storage_options} + to_pandas_kwargs : dict, default None + Additional keyword arguments passed to :meth:`pyarrow.Table.to_pandas` + to control how the pyarrow Table is converted to a pandas DataFrame. + This is only used when `engine="pyarrow"`. + .. versionadded:: 1.3.0 dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable' @@ -649,5 +656,6 @@ def read_parquet( storage_options=storage_options, dtype_backend=dtype_backend, filesystem=filesystem, + to_pandas_kwargs=to_pandas_kwargs, **kwargs, )