diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 04e168f1ab6ca..256a1b3146e40 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9012,7 +9012,7 @@ def explode( if len(columns) == 1: result = df[columns[0]].explode() else: - mylen = lambda x: len(x) if is_list_like(x) else -1 + mylen = lambda x: len(x) if is_list_like(x) and len(x) > 0 else 1 counts0 = self[columns[0]].apply(mylen) for c in columns[1:]: if not all(counts0 == self[c].apply(mylen)): diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 8716a181120f6..a6cfe3e994aaa 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -223,9 +223,15 @@ def test_explode_sets(): @pytest.mark.parametrize( - "input_subset, expected_dict, expected_index", + "input_dict, input_index, input_subset, expected_dict, expected_index", [ ( + { + "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan], + "B": 1, + "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan], + }, + list("abcde"), list("AC"), { "A": pd.Series( @@ -239,6 +245,12 @@ def test_explode_sets(): list("aaabcdde"), ), ( + { + "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan], + "B": 1, + "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan], + }, + list("abcde"), list("A"), { "A": pd.Series( @@ -260,18 +272,32 @@ def test_explode_sets(): }, list("aaabcdde"), ), + ( + { + "A": [[0, 1, 2], [], np.nan, [], (3, 4), np.nan, []], + "B": 1, + "C": [["a", "b", "c"], 2, "foo", [], ["d", "e"], np.nan, np.nan], + }, + list("abcdefg"), + list("AC"), + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, np.nan, np.nan], + index=list("aaabcdeefg"), + dtype=object, + ), + "B": 1, + "C": ["a", "b", "c", 2, "foo", np.nan, "d", "e", np.nan, np.nan], + }, + list("aaabcdeefg"), + ), ], ) -def test_multi_columns(input_subset, expected_dict, expected_index): - # GH 39240 - df = pd.DataFrame( - { - "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan], - "B": 1, - "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan], - }, - index=list("abcde"), - ) +def test_multi_columns( + input_dict, input_index, input_subset, expected_dict, expected_index +): + # GH 39240, 46084 + df = pd.DataFrame(input_dict, input_index) result = df.explode(input_subset) expected = pd.DataFrame(expected_dict, expected_index) tm.assert_frame_equal(result, expected)