Skip to content

Commit 086a36d

Browse files
committed
Fix: Add to_pandas_kwargs to read_parquet for PyArrow engine
Adds the `to_pandas_kwargs` parameter to `pd.read_parquet` to allow passing arguments to `pyarrow.Table.to_pandas`. This addresses issues that may arise during Parquet-to-DataFrame conversion, such as handling microsecond timestamps. Fixes pandas-dev#49236
1 parent bbe0e53 commit 086a36d

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

pandas/io/parquet.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ def read(
245245
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
246246
storage_options: StorageOptions | None = None,
247247
filesystem=None,
248+
to_pandas_kwargs: dict[str, Any] | None = None,
248249
**kwargs,
249250
) -> DataFrame:
250251
kwargs["use_pandas_metadata"] = True
@@ -280,7 +281,7 @@ def read(
280281
"make_block is deprecated",
281282
DeprecationWarning,
282283
)
283-
result = pa_table.to_pandas(**to_pandas_kwargs)
284+
result = pa_table.to_pandas(**(to_pandas_kwargs or {}))
284285

285286
if pa_table.schema.metadata:
286287
if b"PANDAS_ATTRS" in pa_table.schema.metadata:
@@ -505,6 +506,7 @@ def read_parquet(
505506
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
506507
filesystem: Any = None,
507508
filters: list[tuple] | list[list[tuple]] | None = None,
509+
to_pandas_kwargs: dict[str, Any] | None = None,
508510
**kwargs,
509511
) -> DataFrame:
510512
"""
@@ -540,6 +542,11 @@ def read_parquet(
540542
If not None, only these columns will be read from the file.
541543
{storage_options}
542544
545+
to_pandas_kwargs : dict, default None
546+
Additional keyword arguments passed to :meth:`pyarrow.Table.to_pandas`
547+
to control how the pyarrow Table is converted to a pandas DataFrame.
548+
This is only used when `engine="pyarrow"`.
549+
543550
.. versionadded:: 1.3.0
544551
545552
dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
@@ -649,5 +656,6 @@ def read_parquet(
649656
storage_options=storage_options,
650657
dtype_backend=dtype_backend,
651658
filesystem=filesystem,
659+
to_pandas_kwargs=to_pandas_kwargs,
652660
**kwargs,
653661
)

0 commit comments

Comments
 (0)