Skip to content

Commit 29a2c43

Browse files
committed
worked on review comments
1 parent 527a587 commit 29a2c43

File tree

2 files changed

+109
-37
lines changed

2 files changed

+109
-37
lines changed

pandas/core/frame.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -7927,6 +7927,10 @@ def explode(
79277927
For multiple columns, specify a non-empty list with each element
79287928
be str or tuple, and all specified columns their list-like data
79297929
on same row of the frame must have matching length.
7930+
7931+
.. versionadded:: 1.3.0
7932+
Multi-column explode
7933+
79307934
ignore_index : bool, default False
79317935
If True, the resulting index will be labeled 0, 1, …, n - 1.
79327936
@@ -7973,6 +7977,8 @@ def explode(
79737977
2 [] 1 []
79747978
3 [3, 4] 1 [d, e]
79757979
7980+
Single-column explode.
7981+
79767982
>>> df.explode('A')
79777983
A B C
79787984
0 0 1 [a, b, c]
@@ -7983,6 +7989,8 @@ def explode(
79837989
3 3 1 [d, e]
79847990
3 4 1 [d, e]
79857991
7992+
Multi-column explode.
7993+
79867994
>>> df.explode(list('AC'))
79877995
A B C
79887996
0 0 1 a
@@ -7998,14 +8006,12 @@ def explode(
79988006

79998007
columns: list[str | tuple]
80008008
if is_scalar(column) or isinstance(column, tuple):
8001-
# mypy: List item 0 has incompatible type "Union[str, Tuple[Any, ...],
8002-
# List[Union[str, Tuple[Any, ...]]]]"; expected
8003-
# "Union[str, Tuple[Any, ...]]"
8004-
columns = [column] # type: ignore[list-item]
8009+
assert isinstance(column, (str, tuple))
8010+
columns = [column]
80058011
elif isinstance(column, list) and all(
80068012
map(lambda c: is_scalar(c) or isinstance(c, tuple), column)
80078013
):
8008-
if len(column) == 0:
8014+
if not column:
80098015
raise ValueError("column must be nonempty")
80108016
if len(column) > len(set(column)):
80118017
raise ValueError("column must be unique")
@@ -8015,7 +8021,7 @@ def explode(
80158021

80168022
df = self.reset_index(drop=True)
80178023
if len(columns) == 1:
8018-
result = df[column].explode()
8024+
result = df[columns[0]].explode()
80198025
else:
80208026
mylen = lambda x: len(x) if is_list_like(x) else -1
80218027
counts0 = self[columns[0]].apply(mylen)

pandas/tests/frame/methods/test_explode.py

+97-31
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,47 @@ def test_error():
2121
with pytest.raises(ValueError, match="columns must be unique"):
2222
df.explode("A")
2323

24-
# GH 39240
25-
df1 = df.assign(C=[["a", "b", "c"], "foo", [], ["d", "e", "f"]])
26-
df1.columns = list("ABC")
27-
with pytest.raises(ValueError, match="columns must have matching element counts"):
28-
df1.explode(list("AC"))
29-
30-
# GH 39240
31-
with pytest.raises(ValueError, match="column must be nonempty"):
32-
df1.explode([])
3324

25+
@pytest.mark.parametrize(
26+
"input_dict, input_index, input_subset, error_message",
27+
[
28+
(
29+
{
30+
"A": [[0, 1, 2], np.nan, [], (3, 4)],
31+
"B": 1,
32+
"C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
33+
},
34+
list("abcd"),
35+
list("AC"),
36+
"columns must have matching element counts",
37+
),
38+
(
39+
{
40+
"A": [[0, 1, 2], np.nan, [], (3, 4)],
41+
"B": 1,
42+
"C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
43+
},
44+
list("abcd"),
45+
[],
46+
"column must be nonempty",
47+
),
48+
(
49+
{
50+
"A": [[0, 1, 2], np.nan, [], (3, 4)],
51+
"B": 1,
52+
"C": [["a", "b", "c"], "foo", [], "d"],
53+
},
54+
list("abcd"),
55+
list("AC"),
56+
"columns must have matching element counts",
57+
),
58+
],
59+
)
60+
def test_error_multi_columns(input_dict, input_index, input_subset, error_message):
3461
# GH 39240
35-
df2 = df.assign(C=[["a", "b", "c"], "foo", [], "d"])
36-
df2.columns = list("ABC")
37-
with pytest.raises(ValueError, match="columns must have matching element counts"):
38-
df2.explode(list("AC"))
62+
df = pd.DataFrame(input_dict, index=input_index)
63+
with pytest.raises(ValueError, match=error_message):
64+
df.explode(input_subset)
3965

4066

4167
def test_basic():
@@ -203,23 +229,63 @@ def test_explode_sets():
203229
tm.assert_frame_equal(result, expected)
204230

205231

206-
def test_multi_columns():
232+
@pytest.mark.parametrize(
233+
"input_dict, input_index, input_subset, expected_dict, expected_index",
234+
[
235+
(
236+
{
237+
"A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
238+
"B": 1,
239+
"C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
240+
},
241+
list("abcde"),
242+
list("AC"),
243+
{
244+
"A": pd.Series(
245+
[0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
246+
index=list("aaabcdde"),
247+
dtype=object,
248+
),
249+
"B": 1,
250+
"C": ["a", "b", "c", "foo", np.nan, "d", "e", np.nan],
251+
},
252+
list("aaabcdde"),
253+
),
254+
(
255+
{
256+
"A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
257+
"B": 1,
258+
"C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
259+
},
260+
list("abcde"),
261+
list("A"),
262+
{
263+
"A": pd.Series(
264+
[0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
265+
index=list("aaabcdde"),
266+
dtype=object,
267+
),
268+
"B": 1,
269+
"C": [
270+
["a", "b", "c"],
271+
["a", "b", "c"],
272+
["a", "b", "c"],
273+
"foo",
274+
[],
275+
["d", "e"],
276+
["d", "e"],
277+
np.nan,
278+
],
279+
},
280+
list("aaabcdde"),
281+
),
282+
],
283+
)
284+
def test_multi_columns(
285+
input_dict, input_index, input_subset, expected_dict, expected_index
286+
):
207287
# GH 39240
208-
df = pd.DataFrame(
209-
{
210-
"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")),
211-
"B": 1,
212-
"C": [["a", "b", "c"], "foo", [], ["d", "e"]],
213-
}
214-
)
215-
result = df.explode(list("AC"))
216-
expected = pd.DataFrame(
217-
{
218-
"A": pd.Series(
219-
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
220-
),
221-
"B": 1,
222-
"C": ["a", "b", "c", "foo", np.nan, "d", "e"],
223-
}
224-
)
288+
df = pd.DataFrame(input_dict, index=input_index)
289+
result = df.explode(input_subset)
290+
expected = pd.DataFrame(expected_dict, expected_index)
225291
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)