Skip to content

Commit 29c0bc2

Browse files
dsaxtonjbrockmendel
authored andcommitted
ENH: Make explode work for sets (pandas-dev#35637)
1 parent 0d28752 commit 29c0bc2

File tree

6 files changed

+29
-9
lines changed

6 files changed

+29
-9
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ Other enhancements
103103

104104
- Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`)
105105
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
106-
-
106+
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
107107
-
108108

109109
.. _whatsnew_120.api_breaking.python:

pandas/_libs/reshape.pyx

+4-2
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ def explode(ndarray[object] values):
124124
counts = np.zeros(n, dtype='int64')
125125
for i in range(n):
126126
v = values[i]
127-
if c_is_list_like(v, False):
127+
128+
if c_is_list_like(v, True):
128129
if len(v):
129130
counts[i] += len(v)
130131
else:
@@ -138,8 +139,9 @@ def explode(ndarray[object] values):
138139
for i in range(n):
139140
v = values[i]
140141

141-
if c_is_list_like(v, False):
142+
if c_is_list_like(v, True):
142143
if len(v):
144+
v = list(v)
143145
for j in range(len(v)):
144146
result[count] = v[j]
145147
count += 1

pandas/core/frame.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -7091,10 +7091,11 @@ def explode(
70917091
70927092
Notes
70937093
-----
7094-
This routine will explode list-likes including lists, tuples,
7094+
This routine will explode list-likes including lists, tuples, sets,
70957095
Series, and np.ndarray. The result dtype of the subset rows will
7096-
be object. Scalars will be returned unchanged. Empty list-likes will
7097-
result in a np.nan for that row.
7096+
be object. Scalars will be returned unchanged, and empty list-likes will
7097+
result in a np.nan for that row. In addition, the ordering of rows in the
7098+
output will be non-deterministic when exploding sets.
70987099
70997100
Examples
71007101
--------

pandas/core/series.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -3829,10 +3829,11 @@ def explode(self, ignore_index: bool = False) -> "Series":
38293829
38303830
Notes
38313831
-----
3832-
This routine will explode list-likes including lists, tuples,
3832+
This routine will explode list-likes including lists, tuples, sets,
38333833
Series, and np.ndarray. The result dtype of the subset rows will
3834-
be object. Scalars will be returned unchanged. Empty list-likes will
3835-
result in a np.nan for that row.
3834+
be object. Scalars will be returned unchanged, and empty list-likes will
3835+
result in a np.nan for that row. In addition, the ordering of elements in
3836+
the output will be non-deterministic when exploding sets.
38363837
38373838
Examples
38383839
--------

pandas/tests/frame/methods/test_explode.py

+8
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,11 @@ def test_ignore_index():
172172
{"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
173173
)
174174
tm.assert_frame_equal(result, expected)
175+
176+
177+
def test_explode_sets():
178+
# https://github.com/pandas-dev/pandas/issues/35614
179+
df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1])
180+
result = df.explode(column="a").sort_values(by="a")
181+
expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1])
182+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_explode.py

+8
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,11 @@ def test_ignore_index():
126126
result = s.explode(ignore_index=True)
127127
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
128128
tm.assert_series_equal(result, expected)
129+
130+
131+
def test_explode_sets():
132+
# https://github.com/pandas-dev/pandas/issues/35614
133+
s = pd.Series([{"a", "b", "c"}], index=[1])
134+
result = s.explode().sort_values()
135+
expected = pd.Series(["a", "b", "c"], index=[1, 1, 1])
136+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)