diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9fab1d12fc6a5..9930af2670716 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -780,6 +780,7 @@ Reshaping ^^^^^^^^^ - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) +- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index e2feda495c103..9295cf7873d98 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1938,7 +1938,10 @@ def _explode(self): """ # child class explode method supports only list types; return # default implementation for non list types. - if not pa.types.is_list(self.dtype.pyarrow_dtype): + if not ( + pa.types.is_list(self.dtype.pyarrow_dtype) + or pa.types.is_large_list(self.dtype.pyarrow_dtype) + ): return super()._explode() values = self counts = pa.compute.list_value_length(values._pa_array) diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py index 15d615fc35081..e4ad2493f9bb9 100644 --- a/pandas/tests/series/methods/test_explode.py +++ b/pandas/tests/series/methods/test_explode.py @@ -145,8 +145,9 @@ def test_explode_scalars_can_ignore_index(): @pytest.mark.parametrize("ignore_index", [True, False]) -def test_explode_pyarrow_list_type(ignore_index): - # GH 53602 +@pytest.mark.parametrize("list_type", ["list_", "large_list"]) +def test_explode_pyarrow_list_type(ignore_index, list_type): + # GH 53602, 61091 pa = pytest.importorskip("pyarrow") data = [ @@ -156,7 +157,7 @@ def test_explode_pyarrow_list_type(ignore_index): [2, 3], None, ] - ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + ser = pd.Series(data, dtype=pd.ArrowDtype(getattr(pa, list_type)(pa.int64()))) result = ser.explode(ignore_index=ignore_index) expected = pd.Series( data=[None, None, 1, None, 2, 3, None],