diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 567b6853bd633..0b74ea89eb4ea 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -298,6 +298,7 @@ Other enhancements - :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`). - :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`). - :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`). +- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to ``pd.concat`` or ``DataFrame.sort_values`` (:issue:`34932`). .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d12ebeafe8510..44bb0c47f153d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6895,7 +6895,9 @@ def stack(self, level=-1, dropna=True): else: return stack(self, level, dropna=dropna) - def explode(self, column: Union[str, Tuple]) -> "DataFrame": + def explode( + self, column: Union[str, Tuple], ignore_index: bool = False + ) -> "DataFrame": """ Transform each element of a list-like to a row, replicating index values. @@ -6905,6 +6907,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame": ---------- column : str or tuple Column to explode. + ignore_index : bool, default False + If True, the resulting index will be labeled 0, 1, …, n - 1. Returns ------- @@ -6961,7 +6965,10 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame": assert df is not None # needed for mypy result = df[column].explode() result = df.drop([column], axis=1).join(result) - result.index = self.index.take(result.index) + if ignore_index: + result.index = ibase.default_index(len(result)) + else: + result.index = self.index.take(result.index) result = result.reindex(columns=self.columns, copy=False) return result diff --git a/pandas/core/series.py b/pandas/core/series.py index cab8dd133b579..79bba58f14056 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3762,12 +3762,17 @@ def reorder_levels(self, order) -> "Series": result.index = result.index.reorder_levels(order) return result - def explode(self) -> "Series": + def explode(self, ignore_index: bool = False) -> "Series": """ Transform each element of a list-like to a row. .. versionadded:: 0.25.0 + Parameters + ---------- + ignore_index : bool, default False + If True, the resulting index will be labeled 0, 1, …, n - 1. + Returns ------- Series @@ -3814,9 +3819,13 @@ def explode(self) -> "Series": values, counts = reshape.explode(np.asarray(self.array)) - result = self._constructor( - values, index=self.index.repeat(counts), name=self.name - ) + if ignore_index: + result = self._constructor(values, index=range(len(values)), name=self.name) + else: + result = self._constructor( + values, index=self.index.repeat(counts), name=self.name + ) + return result def unstack(self, level=-1, fill_value=None): diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index bad8349ec977b..2bbe8ac2d5b81 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -162,3 +162,13 @@ def test_duplicate_index(input_dict, input_index, expected_dict, expected_index) result = df.explode("col1") expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object) tm.assert_frame_equal(result, expected) + + +def test_ignore_index(): + # GH 34932 + df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]}) + result = df.explode("values", ignore_index=True) + expected = pd.DataFrame( + {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3] + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py index 979199e1efc62..4b65e042f7b02 100644 --- a/pandas/tests/series/methods/test_explode.py +++ b/pandas/tests/series/methods/test_explode.py @@ -88,7 +88,6 @@ def test_typical_usecase(): columns=["var1", "var2"], ) exploded = df.var1.str.split(",").explode() - exploded result = df[["var2"]].join(exploded) expected = pd.DataFrame( {"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")}, @@ -119,3 +118,11 @@ def test_duplicate_index(): result = s.explode() expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object) tm.assert_series_equal(result, expected) + + +def test_ignore_index(): + # GH 34932 + s = pd.Series([[1, 2], [3, 4]]) + result = s.explode(ignore_index=True) + expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object) + tm.assert_series_equal(result, expected)