Skip to content

Commit 2117367

Browse files
BUG: Fix read_parquet not working with data type pyarrow list (#57411)
1 parent 9cd5e55 commit 2117367

File tree

2 files changed

+16
-0
lines changed

2 files changed

+16
-0
lines changed

pandas/core/dtypes/common.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,6 +1623,8 @@ def pandas_dtype(dtype) -> DtypeObj:
16231623
return dtype.dtype
16241624
elif isinstance(dtype, (np.dtype, ExtensionDtype)):
16251625
return dtype
1626+
elif "list" in str(dtype) and "pyarrow" in str(dtype):
1627+
return dtype
16261628

16271629
# registered extension types
16281630
result = registry.find(dtype)

pandas/tests/io/test_parquet.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,20 @@ def test_cross_engine_fp_pa(df_cross_compat, pa, fp):
348348
tm.assert_frame_equal(result, df[["a", "d"]])
349349

350350

351+
def test_pyarrow_list():
352+
pytest.importorskip("fastparquet")
353+
import pyarrow as pa
354+
355+
list_int = pa.list_(pa.int64())
356+
s = pd.Series([[1, 1], [2, 2]], dtype=pd.ArrowDtype(list_int))
357+
358+
df = pd.DataFrame(s, columns=["col"])
359+
df.to_parquet("ex.parquet")
360+
361+
result = read_parquet(path="ex.parquet", dtype_backend="pyarrow")
362+
tm.assert_frame_equal(df, result)
363+
364+
351365
class Base:
352366
def check_error_on_write(self, df, engine, exc, err_msg):
353367
# check that we are raising the exception on writing

0 commit comments

Comments
 (0)