From 96136efdfb4b25a89c1ff5ffbe9d124212002360 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanjain@microsoft.com>
Date: Thu, 25 Apr 2024 20:06:23 -0400
Subject: [PATCH 1/7] preserve index in list accessor

---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/core/arrays/arrow/accessors.py         | 21 +++++++++---------
 .../series/accessors/test_list_accessor.py    | 22 ++++++++++++++++---
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 7263329d2e53b..62e1125398279 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -289,6 +289,7 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
+- Fixed bug in :class:`ListAccessor` not preserving index. (:issue:`58425`)
 - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
 - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py
index 19ec253e81ef2..c3c641a34e5d3 100644
--- a/pandas/core/arrays/arrow/accessors.py
+++ b/pandas/core/arrays/arrow/accessors.py
@@ -110,7 +110,9 @@ def len(self) -> Series:
         from pandas import Series
 
         value_lengths = pc.list_value_length(self._pa_array)
-        return Series(value_lengths, dtype=ArrowDtype(value_lengths.type))
+        return Series(
+            value_lengths, dtype=ArrowDtype(value_lengths.type), index=self._data.index
+        )
 
     def __getitem__(self, key: int | slice) -> Series:
         """
@@ -149,7 +151,9 @@ def __getitem__(self, key: int | slice) -> Series:
             # if key < 0:
             #     key = pc.add(key, pc.list_value_length(self._pa_array))
             element = pc.list_element(self._pa_array, key)
-            return Series(element, dtype=ArrowDtype(element.type))
+            return Series(
+                element, dtype=ArrowDtype(element.type), index=self._data.index
+            )
         elif isinstance(key, slice):
             if pa_version_under11p0:
                 raise NotImplementedError(
@@ -167,7 +171,7 @@ def __getitem__(self, key: int | slice) -> Series:
             if step is None:
                 step = 1
             sliced = pc.list_slice(self._pa_array, start, stop, step)
-            return Series(sliced, dtype=ArrowDtype(sliced.type))
+            return Series(sliced, dtype=ArrowDtype(sliced.type), index=self._data.index)
         else:
             raise ValueError(f"key must be an int or slice, got {type(key).__name__}")
 
@@ -195,15 +199,12 @@ def flatten(self) -> Series:
         ... )
         >>> s.list.flatten()
         0    1
-        1    2
-        2    3
-        3    3
+        0    2
+        0    3
+        1    3
         dtype: int64[pyarrow]
         """
-        from pandas import Series
-
-        flattened = pc.list_flatten(self._pa_array)
-        return Series(flattened, dtype=ArrowDtype(flattened.type))
+        return self._data.dropna().explode()
 
 
 class StructAccessor(ArrowAccessor):
diff --git a/pandas/tests/series/accessors/test_list_accessor.py b/pandas/tests/series/accessors/test_list_accessor.py
index 1c60567c1a530..2d2eeac3d3c1b 100644
--- a/pandas/tests/series/accessors/test_list_accessor.py
+++ b/pandas/tests/series/accessors/test_list_accessor.py
@@ -31,10 +31,22 @@ def test_list_getitem(list_dtype):
     tm.assert_series_equal(actual, expected)
 
 
+def test_list_getitem_index():
+    ser = Series(
+        [[1, 2, 3], [4, None, 5], None],
+        dtype=ArrowDtype(pa.list_(pa.int64())),
+        index=[1, 3, 7],
+    )
+    actual = ser.list[1]
+    expected = Series([2, None, None], dtype="int64[pyarrow]", index=[1, 3, 7])
+    tm.assert_series_equal(actual, expected)
+
+
 def test_list_getitem_slice():
     ser = Series(
         [[1, 2, 3], [4, None, 5], None],
         dtype=ArrowDtype(pa.list_(pa.int64())),
+        index=[1, 3, 7],
     )
     if pa_version_under11p0:
         with pytest.raises(
@@ -44,7 +56,9 @@ def test_list_getitem_slice():
     else:
         actual = ser.list[1:None:None]
         expected = Series(
-            [[2, 3], [None, 5], None], dtype=ArrowDtype(pa.list_(pa.int64()))
+            [[2, 3], [None, 5], None],
+            dtype=ArrowDtype(pa.list_(pa.int64())),
+            index=[1, 3, 7],
         )
         tm.assert_series_equal(actual, expected)
 
@@ -61,11 +75,13 @@ def test_list_len():
 
 def test_list_flatten():
     ser = Series(
-        [[1, 2, 3], [4, None], None],
+        [[1, 2, 3], None, [4, None]],
         dtype=ArrowDtype(pa.list_(pa.int64())),
     )
     actual = ser.list.flatten()
-    expected = Series([1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()))
+    expected = Series(
+        [1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()), index=[0, 0, 0, 2, 2]
+    )
     tm.assert_series_equal(actual, expected)
 
 

From 2af7fdb2491895da4f6e4aaaeaa49e1330aa8cfe Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanjain@microsoft.com>
Date: Thu, 25 Apr 2024 20:11:10 -0400
Subject: [PATCH 2/7] gh reference

---
 pandas/tests/series/accessors/test_list_accessor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/series/accessors/test_list_accessor.py b/pandas/tests/series/accessors/test_list_accessor.py
index 2d2eeac3d3c1b..620df45dac36c 100644
--- a/pandas/tests/series/accessors/test_list_accessor.py
+++ b/pandas/tests/series/accessors/test_list_accessor.py
@@ -32,6 +32,7 @@ def test_list_getitem(list_dtype):
 
 
 def test_list_getitem_index():
+    # GH 58425
     ser = Series(
         [[1, 2, 3], [4, None, 5], None],
         dtype=ArrowDtype(pa.list_(pa.int64())),

From 9c2f6f9517f6abbfaf3c4734127fa8701fbab27f Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanjain@microsoft.com>
Date: Thu, 25 Apr 2024 20:42:39 -0400
Subject: [PATCH 3/7] explode fix

---
 pandas/core/arrays/arrow/accessors.py               | 7 ++++++-
 pandas/tests/series/accessors/test_list_accessor.py | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py
index c3c641a34e5d3..802648e4f3261 100644
--- a/pandas/core/arrays/arrow/accessors.py
+++ b/pandas/core/arrays/arrow/accessors.py
@@ -204,7 +204,12 @@ def flatten(self) -> Series:
         1    3
         dtype: int64[pyarrow]
         """
-        return self._data.dropna().explode()
+        from pandas import Series
+
+        counts = pa.compute.list_value_length(self._pa_array).fill_null(0)
+        flattened = pa.compute.list_flatten(self._pa_array)
+        index = self._data.index.repeat(counts)
+        return Series(flattened, dtype=ArrowDtype(flattened.type), index=index)
 
 
 class StructAccessor(ArrowAccessor):
diff --git a/pandas/tests/series/accessors/test_list_accessor.py b/pandas/tests/series/accessors/test_list_accessor.py
index 620df45dac36c..2b5119eb12009 100644
--- a/pandas/tests/series/accessors/test_list_accessor.py
+++ b/pandas/tests/series/accessors/test_list_accessor.py
@@ -76,7 +76,7 @@ def test_list_len():
 
 def test_list_flatten():
     ser = Series(
-        [[1, 2, 3], None, [4, None]],
+        [[1, 2, 3], None, [4, None], []],
         dtype=ArrowDtype(pa.list_(pa.int64())),
     )
     actual = ser.list.flatten()

From 92698056e2fcd0f97987ffe262e2e69cda0e7a4f Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanjain@microsoft.com>
Date: Thu, 25 Apr 2024 20:45:06 -0400
Subject: [PATCH 4/7] cleanup

---
 pandas/core/arrays/arrow/accessors.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py
index 802648e4f3261..d8f948a37d206 100644
--- a/pandas/core/arrays/arrow/accessors.py
+++ b/pandas/core/arrays/arrow/accessors.py
@@ -206,9 +206,9 @@ def flatten(self) -> Series:
         """
         from pandas import Series
 
-        counts = pa.compute.list_value_length(self._pa_array).fill_null(0)
+        counts = pa.compute.list_value_length(self._pa_array)
         flattened = pa.compute.list_flatten(self._pa_array)
-        index = self._data.index.repeat(counts)
+        index = self._data.index.repeat(counts.fill_null(pa.scalar(0, counts.type)))
         return Series(flattened, dtype=ArrowDtype(flattened.type), index=index)
 
 

From 38f66b68e5f594c14a1a4aa2ab45bbe051f9f5f9 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanjain@microsoft.com>
Date: Thu, 25 Apr 2024 20:49:16 -0400
Subject: [PATCH 5/7] improve test

---
 pandas/tests/series/accessors/test_list_accessor.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/series/accessors/test_list_accessor.py b/pandas/tests/series/accessors/test_list_accessor.py
index 2b5119eb12009..c153e800cb534 100644
--- a/pandas/tests/series/accessors/test_list_accessor.py
+++ b/pandas/tests/series/accessors/test_list_accessor.py
@@ -76,12 +76,14 @@ def test_list_len():
 
 def test_list_flatten():
     ser = Series(
-        [[1, 2, 3], None, [4, None], []],
+        [[1, 2, 3], None, [4, None], [], [7, 8]],
         dtype=ArrowDtype(pa.list_(pa.int64())),
     )
     actual = ser.list.flatten()
     expected = Series(
-        [1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()), index=[0, 0, 0, 2, 2]
+        [1, 2, 3, 4, None, 7, 8],
+        dtype=ArrowDtype(pa.int64()),
+        index=[0, 0, 0, 2, 2, 4, 4],
     )
     tm.assert_series_equal(actual, expected)
 

From 46b3e5d608dc539fc98904734987e79de381b4ac Mon Sep 17 00:00:00 2001
From: rohanjain101 <38412262+rohanjain101@users.noreply.github.com>
Date: Mon, 29 Apr 2024 14:11:38 -0400
Subject: [PATCH 6/7] Update v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 633f5b3b99e21..c8ee17161d3e7 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -465,7 +465,7 @@ Styler
 Other
 ^^^^^
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
-- Bug in :class:`ListAccessor` not preserving index. (:issue:`58425`)
+- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)

From 4ac5f3f80840fd90c890208373cc0d214f7c1226 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanjain@microsoft.com>
Date: Mon, 29 Apr 2024 17:56:07 -0400
Subject: [PATCH 7/7] f

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index b99232213e6bb..5eec18cd6e5b0 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -471,7 +471,6 @@ Other
 ^^^^^
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
-- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
@@ -483,6 +482,7 @@ Other
 - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
 - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
 - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
+- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
 
 .. ***DO NOT USE THIS SECTION***