ENH: add ignore_index option in DataFrame.explode (pandas-dev#34933)

erfannariman · fangchenli · commit a8a2387e7724 · 2020-06-27T01:37:29.000-05:00
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -327,6 +327,7 @@ Other enhancements
 - :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
 - :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`).
 - :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
+- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6939,7 +6939,9 @@ def stack(self, level=-1, dropna=True):
         else:
             return stack(self, level, dropna=dropna)
 
-    def explode(self, column: Union[str, Tuple]) -> "DataFrame":
+    def explode(
+        self, column: Union[str, Tuple], ignore_index: bool = False
+    ) -> "DataFrame":
         """
         Transform each element of a list-like to a row, replicating index values.
 
@@ -6949,6 +6951,10 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
         ----------
         column : str or tuple
             Column to explode.
+        ignore_index : bool, default False
+            If True, the resulting index will be labeled 0, 1, …, n - 1.
+
+            .. versionadded:: 1.1.0
 
         Returns
         -------
@@ -7005,7 +7011,10 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
         assert df is not None  # needed for mypy
         result = df[column].explode()
         result = df.drop([column], axis=1).join(result)
-        result.index = self.index.take(result.index)
+        if ignore_index:
+            result.index = ibase.default_index(len(result))
+        else:
+            result.index = self.index.take(result.index)
         result = result.reindex(columns=self.columns, copy=False)
 
         return result
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3774,12 +3774,19 @@ def reorder_levels(self, order) -> "Series":
         result.index = result.index.reorder_levels(order)
         return result
 
-    def explode(self) -> "Series":
+    def explode(self, ignore_index: bool = False) -> "Series":
         """
         Transform each element of a list-like to a row.
 
         .. versionadded:: 0.25.0
 
+        Parameters
+        ----------
+        ignore_index : bool, default False
+            If True, the resulting index will be labeled 0, 1, …, n - 1.
+
+            .. versionadded:: 1.1.0
+
         Returns
         -------
         Series
@@ -3826,9 +3833,13 @@ def explode(self) -> "Series":
 
         values, counts = reshape.explode(np.asarray(self.array))
 
-        result = self._constructor(
-            values, index=self.index.repeat(counts), name=self.name
-        )
+        if ignore_index:
+            index = ibase.default_index(len(values))
+        else:
+            index = self.index.repeat(counts)
+
+        result = self._constructor(values, index=index, name=self.name)
+
         return result
 
     def unstack(self, level=-1, fill_value=None):
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
@@ -162,3 +162,13 @@ def test_duplicate_index(input_dict, input_index, expected_dict, expected_index)
     result = df.explode("col1")
     expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
     tm.assert_frame_equal(result, expected)
+
+
+def test_ignore_index():
+    # GH 34932
+    df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
+    result = df.explode("values", ignore_index=True)
+    expected = pd.DataFrame(
+        {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py
@@ -118,3 +118,11 @@ def test_duplicate_index():
     result = s.explode()
     expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
     tm.assert_series_equal(result, expected)
+
+
+def test_ignore_index():
+    # GH 34932
+    s = pd.Series([[1, 2], [3, 4]])
+    result = s.explode(ignore_index=True)
+    expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
+    tm.assert_series_equal(result, expected)