Skip to content

Remove exploded variable #34938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ Other enhancements
- :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`).
- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to ``pd.concat`` or ``DataFrame.sort_values`` (:issue:`34932`).

.. ---------------------------------------------------------------------------

Expand Down
11 changes: 9 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6895,7 +6895,9 @@ def stack(self, level=-1, dropna=True):
else:
return stack(self, level, dropna=dropna)

def explode(self, column: Union[str, Tuple]) -> "DataFrame":
def explode(
self, column: Union[str, Tuple], ignore_index: bool = False
) -> "DataFrame":
"""
Transform each element of a list-like to a row, replicating index values.

Expand All @@ -6905,6 +6907,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
----------
column : str or tuple
Column to explode.
ignore_index : bool, default False
If True, the resulting index will be labeled 0, 1, …, n - 1.

Returns
-------
Expand Down Expand Up @@ -6961,7 +6965,10 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
assert df is not None # needed for mypy
result = df[column].explode()
result = df.drop([column], axis=1).join(result)
result.index = self.index.take(result.index)
if ignore_index:
result.index = ibase.default_index(len(result))
else:
result.index = self.index.take(result.index)
result = result.reindex(columns=self.columns, copy=False)

return result
Expand Down
17 changes: 13 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3762,12 +3762,17 @@ def reorder_levels(self, order) -> "Series":
result.index = result.index.reorder_levels(order)
return result

def explode(self) -> "Series":
def explode(self, ignore_index: bool = False) -> "Series":
"""
Transform each element of a list-like to a row.

.. versionadded:: 0.25.0

Parameters
----------
ignore_index : bool, default False
If True, the resulting index will be labeled 0, 1, …, n - 1.

Returns
-------
Series
Expand Down Expand Up @@ -3814,9 +3819,13 @@ def explode(self) -> "Series":

values, counts = reshape.explode(np.asarray(self.array))

result = self._constructor(
values, index=self.index.repeat(counts), name=self.name
)
if ignore_index:
result = self._constructor(values, index=range(len(values)), name=self.name)
else:
result = self._constructor(
values, index=self.index.repeat(counts), name=self.name
)

return result

def unstack(self, level=-1, fill_value=None):
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/frame/methods/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,13 @@ def test_duplicate_index(input_dict, input_index, expected_dict, expected_index)
result = df.explode("col1")
expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
tm.assert_frame_equal(result, expected)


def test_ignore_index():
# GH 34932
df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
result = df.explode("values", ignore_index=True)
expected = pd.DataFrame(
{"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
)
tm.assert_frame_equal(result, expected)
9 changes: 8 additions & 1 deletion pandas/tests/series/methods/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ def test_typical_usecase():
columns=["var1", "var2"],
)
exploded = df.var1.str.split(",").explode()
exploded
result = df[["var2"]].join(exploded)
expected = pd.DataFrame(
{"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")},
Expand Down Expand Up @@ -119,3 +118,11 @@ def test_duplicate_index():
result = s.explode()
expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
tm.assert_series_equal(result, expected)


def test_ignore_index():
# GH 34932
s = pd.Series([[1, 2], [3, 4]])
result = s.explode(ignore_index=True)
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)