Skip to content

Commit a8a2387

Browse files
erfannarimanfangchenli
authored andcommitted
ENH: add ignore_index option in DataFrame.explode (pandas-dev#34933)
1 parent c0470a7 commit a8a2387

File tree

5 files changed

+45
-6
lines changed

5 files changed

+45
-6
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ Other enhancements
327327
- :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
328328
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`).
329329
- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
330+
- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
330331

331332
.. ---------------------------------------------------------------------------
332333

pandas/core/frame.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -6939,7 +6939,9 @@ def stack(self, level=-1, dropna=True):
69396939
else:
69406940
return stack(self, level, dropna=dropna)
69416941

6942-
def explode(self, column: Union[str, Tuple]) -> "DataFrame":
6942+
def explode(
6943+
self, column: Union[str, Tuple], ignore_index: bool = False
6944+
) -> "DataFrame":
69436945
"""
69446946
Transform each element of a list-like to a row, replicating index values.
69456947
@@ -6949,6 +6951,10 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
69496951
----------
69506952
column : str or tuple
69516953
Column to explode.
6954+
ignore_index : bool, default False
6955+
If True, the resulting index will be labeled 0, 1, …, n - 1.
6956+
6957+
.. versionadded:: 1.1.0
69526958
69536959
Returns
69546960
-------
@@ -7005,7 +7011,10 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
70057011
assert df is not None # needed for mypy
70067012
result = df[column].explode()
70077013
result = df.drop([column], axis=1).join(result)
7008-
result.index = self.index.take(result.index)
7014+
if ignore_index:
7015+
result.index = ibase.default_index(len(result))
7016+
else:
7017+
result.index = self.index.take(result.index)
70097018
result = result.reindex(columns=self.columns, copy=False)
70107019

70117020
return result

pandas/core/series.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -3774,12 +3774,19 @@ def reorder_levels(self, order) -> "Series":
37743774
result.index = result.index.reorder_levels(order)
37753775
return result
37763776

3777-
def explode(self) -> "Series":
3777+
def explode(self, ignore_index: bool = False) -> "Series":
37783778
"""
37793779
Transform each element of a list-like to a row.
37803780
37813781
.. versionadded:: 0.25.0
37823782
3783+
Parameters
3784+
----------
3785+
ignore_index : bool, default False
3786+
If True, the resulting index will be labeled 0, 1, …, n - 1.
3787+
3788+
.. versionadded:: 1.1.0
3789+
37833790
Returns
37843791
-------
37853792
Series
@@ -3826,9 +3833,13 @@ def explode(self) -> "Series":
38263833

38273834
values, counts = reshape.explode(np.asarray(self.array))
38283835

3829-
result = self._constructor(
3830-
values, index=self.index.repeat(counts), name=self.name
3831-
)
3836+
if ignore_index:
3837+
index = ibase.default_index(len(values))
3838+
else:
3839+
index = self.index.repeat(counts)
3840+
3841+
result = self._constructor(values, index=index, name=self.name)
3842+
38323843
return result
38333844

38343845
def unstack(self, level=-1, fill_value=None):

pandas/tests/frame/methods/test_explode.py

+10
Original file line numberDiff line numberDiff line change
@@ -162,3 +162,13 @@ def test_duplicate_index(input_dict, input_index, expected_dict, expected_index)
162162
result = df.explode("col1")
163163
expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
164164
tm.assert_frame_equal(result, expected)
165+
166+
167+
def test_ignore_index():
168+
# GH 34932
169+
df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
170+
result = df.explode("values", ignore_index=True)
171+
expected = pd.DataFrame(
172+
{"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
173+
)
174+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_explode.py

+8
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,11 @@ def test_duplicate_index():
118118
result = s.explode()
119119
expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
120120
tm.assert_series_equal(result, expected)
121+
122+
123+
def test_ignore_index():
124+
# GH 34932
125+
s = pd.Series([[1, 2], [3, 4]])
126+
result = s.explode(ignore_index=True)
127+
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
128+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)