Skip to content

Commit 8d13866

Browse files
committed
worked on review comments
1 parent 6353525 commit 8d13866

File tree

2 files changed

+87
-33
lines changed

2 files changed

+87
-33
lines changed

pandas/core/frame.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -8185,6 +8185,10 @@ def explode(
81858185
For multiple columns, specify a non-empty list with each element
81868186
be str or tuple, and all specified columns their list-like data
81878187
on same row of the frame must have matching length.
8188+
8189+
.. versionadded:: 1.3.0
8190+
Multi-column explode
8191+
81888192
ignore_index : bool, default False
81898193
If True, the resulting index will be labeled 0, 1, …, n - 1.
81908194
@@ -8231,6 +8235,8 @@ def explode(
82318235
2 [] 1 []
82328236
3 [3, 4] 1 [d, e]
82338237
8238+
Single-column explode.
8239+
82348240
>>> df.explode('A')
82358241
A B C
82368242
0 0 1 [a, b, c]
@@ -8241,6 +8247,8 @@ def explode(
82418247
3 3 1 [d, e]
82428248
3 4 1 [d, e]
82438249
8250+
Multi-column explode.
8251+
82448252
>>> df.explode(list('AC'))
82458253
A B C
82468254
0 0 1 a
@@ -8256,14 +8264,12 @@ def explode(
82568264

82578265
columns: list[str | tuple]
82588266
if is_scalar(column) or isinstance(column, tuple):
8259-
# mypy: List item 0 has incompatible type "Union[str, Tuple[Any, ...],
8260-
# List[Union[str, Tuple[Any, ...]]]]"; expected
8261-
# "Union[str, Tuple[Any, ...]]"
8262-
columns = [column] # type: ignore[list-item]
8267+
assert isinstance(column, (str, tuple))
8268+
columns = [column]
82638269
elif isinstance(column, list) and all(
82648270
map(lambda c: is_scalar(c) or isinstance(c, tuple), column)
82658271
):
8266-
if len(column) == 0:
8272+
if not column:
82678273
raise ValueError("column must be nonempty")
82688274
if len(column) > len(set(column)):
82698275
raise ValueError("column must be unique")
@@ -8273,7 +8279,7 @@ def explode(
82738279

82748280
df = self.reset_index(drop=True)
82758281
if len(columns) == 1:
8276-
result = df[column].explode()
8282+
result = df[columns[0]].explode()
82778283
else:
82788284
mylen = lambda x: len(x) if is_list_like(x) else -1
82798285
counts0 = self[columns[0]].apply(mylen)

pandas/tests/frame/methods/test_explode.py

+75-27
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,36 @@ def test_error():
2121
with pytest.raises(ValueError, match="columns must be unique"):
2222
df.explode("A")
2323

24-
# GH 39240
25-
df1 = df.assign(C=[["a", "b", "c"], "foo", [], ["d", "e", "f"]])
26-
df1.columns = list("ABC")
27-
with pytest.raises(ValueError, match="columns must have matching element counts"):
28-
df1.explode(list("AC"))
29-
30-
# GH 39240
31-
with pytest.raises(ValueError, match="column must be nonempty"):
32-
df1.explode([])
3324

25+
@pytest.mark.parametrize(
26+
"input_subset, error_message",
27+
[
28+
(
29+
list("AC"),
30+
"columns must have matching element counts",
31+
),
32+
(
33+
[],
34+
"column must be nonempty",
35+
),
36+
(
37+
list("AC"),
38+
"columns must have matching element counts",
39+
),
40+
],
41+
)
42+
def test_error_multi_columns(input_subset, error_message):
3443
# GH 39240
35-
df2 = df.assign(C=[["a", "b", "c"], "foo", [], "d"])
36-
df2.columns = list("ABC")
37-
with pytest.raises(ValueError, match="columns must have matching element counts"):
38-
df2.explode(list("AC"))
44+
df = pd.DataFrame(
45+
{
46+
"A": [[0, 1, 2], np.nan, [], (3, 4)],
47+
"B": 1,
48+
"C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
49+
},
50+
index=list("abcd"),
51+
)
52+
with pytest.raises(ValueError, match=error_message):
53+
df.explode(input_subset)
3954

4055

4156
def test_basic():
@@ -203,23 +218,56 @@ def test_explode_sets():
203218
tm.assert_frame_equal(result, expected)
204219

205220

206-
def test_multi_columns():
221+
@pytest.mark.parametrize(
222+
"input_subset, expected_dict, expected_index",
223+
[
224+
(
225+
list("AC"),
226+
{
227+
"A": pd.Series(
228+
[0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
229+
index=list("aaabcdde"),
230+
dtype=object,
231+
),
232+
"B": 1,
233+
"C": ["a", "b", "c", "foo", np.nan, "d", "e", np.nan],
234+
},
235+
list("aaabcdde"),
236+
),
237+
(
238+
list("A"),
239+
{
240+
"A": pd.Series(
241+
[0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
242+
index=list("aaabcdde"),
243+
dtype=object,
244+
),
245+
"B": 1,
246+
"C": [
247+
["a", "b", "c"],
248+
["a", "b", "c"],
249+
["a", "b", "c"],
250+
"foo",
251+
[],
252+
["d", "e"],
253+
["d", "e"],
254+
np.nan,
255+
],
256+
},
257+
list("aaabcdde"),
258+
),
259+
],
260+
)
261+
def test_multi_columns(input_subset, expected_dict, expected_index):
207262
# GH 39240
208263
df = pd.DataFrame(
209264
{
210-
"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")),
265+
"A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
211266
"B": 1,
212-
"C": [["a", "b", "c"], "foo", [], ["d", "e"]],
213-
}
214-
)
215-
result = df.explode(list("AC"))
216-
expected = pd.DataFrame(
217-
{
218-
"A": pd.Series(
219-
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
220-
),
221-
"B": 1,
222-
"C": ["a", "b", "c", "foo", np.nan, "d", "e"],
223-
}
267+
"C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
268+
},
269+
index=list("abcde"),
224270
)
271+
result = df.explode(input_subset)
272+
expected = pd.DataFrame(expected_dict, expected_index)
225273
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)