ENH: Make explode work for sets (pandas-dev#35637)

dsaxton · jbrockmendel · commit 29c0bc2d858c · 2020-09-08T10:03:58.000-07:00
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -103,7 +103,7 @@ Other enhancements
 
 - Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`)
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
--
+- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
 -
 
 .. _whatsnew_120.api_breaking.python:
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
@@ -124,7 +124,8 @@ def explode(ndarray[object] values):
     counts = np.zeros(n, dtype='int64')
     for i in range(n):
         v = values[i]
-        if c_is_list_like(v, False):
+
+        if c_is_list_like(v, True):
             if len(v):
                 counts[i] += len(v)
             else:
@@ -138,8 +139,9 @@ def explode(ndarray[object] values):
     for i in range(n):
         v = values[i]
 
-        if c_is_list_like(v, False):
+        if c_is_list_like(v, True):
             if len(v):
+                v = list(v)
                 for j in range(len(v)):
                     result[count] = v[j]
                     count += 1
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7091,10 +7091,11 @@ def explode(
 
         Notes
         -----
-        This routine will explode list-likes including lists, tuples,
+        This routine will explode list-likes including lists, tuples, sets,
         Series, and np.ndarray. The result dtype of the subset rows will
-        be object. Scalars will be returned unchanged. Empty list-likes will
-        result in a np.nan for that row.
+        be object. Scalars will be returned unchanged, and empty list-likes will
+        result in a np.nan for that row. In addition, the ordering of rows in the
+        output will be non-deterministic when exploding sets.
 
         Examples
         --------
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3829,10 +3829,11 @@ def explode(self, ignore_index: bool = False) -> "Series":
 
         Notes
         -----
-        This routine will explode list-likes including lists, tuples,
+        This routine will explode list-likes including lists, tuples, sets,
         Series, and np.ndarray. The result dtype of the subset rows will
-        be object. Scalars will be returned unchanged. Empty list-likes will
-        result in a np.nan for that row.
+        be object. Scalars will be returned unchanged, and empty list-likes will
+        result in a np.nan for that row. In addition, the ordering of elements in
+        the output will be non-deterministic when exploding sets.
 
         Examples
         --------
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
@@ -172,3 +172,11 @@ def test_ignore_index():
         {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_explode_sets():
+    # https://github.com/pandas-dev/pandas/issues/35614
+    df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1])
+    result = df.explode(column="a").sort_values(by="a")
+    expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1])
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py
@@ -126,3 +126,11 @@ def test_ignore_index():
     result = s.explode(ignore_index=True)
     expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
     tm.assert_series_equal(result, expected)
+
+
+def test_explode_sets():
+    # https://github.com/pandas-dev/pandas/issues/35614
+    s = pd.Series([{"a", "b", "c"}], index=[1])
+    result = s.explode().sort_values()
+    expected = pd.Series(["a", "b", "c"], index=[1, 1, 1])
+    tm.assert_series_equal(result, expected)

Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@ Other enhancements`
`103`	`103`
`104`	`104`	- Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`)
`105`	`105`	- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
`106`		`--`
	`106`	+- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
`107`	`107`	`-`
`108`	`108`
`109`	`109`	`.. _whatsnew_120.api_breaking.python:`