diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d7d2e3cf876ca..ff9e803b4990a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -103,7 +103,7 @@ Other enhancements - Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) -- +- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`) - .. _whatsnew_120.api_breaking.python: diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 5c6c15fb50fed..75dbb4b74aabd 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -124,7 +124,8 @@ def explode(ndarray[object] values): counts = np.zeros(n, dtype='int64') for i in range(n): v = values[i] - if c_is_list_like(v, False): + + if c_is_list_like(v, True): if len(v): counts[i] += len(v) else: @@ -138,8 +139,9 @@ def explode(ndarray[object] values): for i in range(n): v = values[i] - if c_is_list_like(v, False): + if c_is_list_like(v, True): if len(v): + v = list(v) for j in range(len(v)): result[count] = v[j] count += 1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 29d6fb9aa7d56..150d6e24dbb86 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7091,10 +7091,11 @@ def explode( Notes ----- - This routine will explode list-likes including lists, tuples, + This routine will explode list-likes including lists, tuples, sets, Series, and np.ndarray. The result dtype of the subset rows will - be object. Scalars will be returned unchanged. Empty list-likes will - result in a np.nan for that row. + be object. Scalars will be returned unchanged, and empty list-likes will + result in a np.nan for that row. In addition, the ordering of rows in the + output will be non-deterministic when exploding sets. Examples -------- diff --git a/pandas/core/series.py b/pandas/core/series.py index d8fdaa2a60252..6cbd93135a2ca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3829,10 +3829,11 @@ def explode(self, ignore_index: bool = False) -> "Series": Notes ----- - This routine will explode list-likes including lists, tuples, + This routine will explode list-likes including lists, tuples, sets, Series, and np.ndarray. The result dtype of the subset rows will - be object. Scalars will be returned unchanged. Empty list-likes will - result in a np.nan for that row. + be object. Scalars will be returned unchanged, and empty list-likes will + result in a np.nan for that row. In addition, the ordering of elements in + the output will be non-deterministic when exploding sets. Examples -------- diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 2bbe8ac2d5b81..bd0901387eeed 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -172,3 +172,11 @@ def test_ignore_index(): {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3] ) tm.assert_frame_equal(result, expected) + + +def test_explode_sets(): + # https://github.com/pandas-dev/pandas/issues/35614 + df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1]) + result = df.explode(column="a").sort_values(by="a") + expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py index 4b65e042f7b02..1f0fbd1cc5ecb 100644 --- a/pandas/tests/series/methods/test_explode.py +++ b/pandas/tests/series/methods/test_explode.py @@ -126,3 +126,11 @@ def test_ignore_index(): result = s.explode(ignore_index=True) expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object) tm.assert_series_equal(result, expected) + + +def test_explode_sets(): + # https://github.com/pandas-dev/pandas/issues/35614 + s = pd.Series([{"a", "b", "c"}], index=[1]) + result = s.explode().sort_values() + expected = pd.Series(["a", "b", "c"], index=[1, 1, 1]) + tm.assert_series_equal(result, expected)