From 34572272f7d6d5c7994f7e70cdc3c0b408e3e705 Mon Sep 17 00:00:00 2001 From: Emaan Hariri Date: Sat, 16 Apr 2022 10:11:56 -0700 Subject: [PATCH 1/2] change mylen function to account for empty lists --- pandas/core/frame.py | 2 +- pandas/tests/frame/methods/test_explode.py | 46 +++++++++++++++++----- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ceede5fdb5577..3a60d3cc19287 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8616,7 +8616,7 @@ def explode( if len(columns) == 1: result = df[columns[0]].explode() else: - mylen = lambda x: len(x) if is_list_like(x) else -1 + mylen = lambda x: len(x) if is_list_like(x) and len(x) else 1 counts0 = self[columns[0]].apply(mylen) for c in columns[1:]: if not all(counts0 == self[c].apply(mylen)): diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 8716a181120f6..437b6cb07fe08 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -223,9 +223,15 @@ def test_explode_sets(): @pytest.mark.parametrize( - "input_subset, expected_dict, expected_index", + "input_dict, input_index, input_subset, expected_dict, expected_index", [ ( + { + "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan], + "B": 1, + "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan], + }, + list("abcde"), list("AC"), { "A": pd.Series( @@ -239,6 +245,12 @@ def test_explode_sets(): list("aaabcdde"), ), ( + { + "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan], + "B": 1, + "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan], + }, + list("abcde"), list("A"), { "A": pd.Series( @@ -260,18 +272,32 @@ def test_explode_sets(): }, list("aaabcdde"), ), + ( + { + "A": [[0, 1, 2], [], np.nan, [], (3, 4), np.nan, []], + "B": 1, + "C": [["a", "b", "c"], 2, "foo", [], ["d", "e"], np.nan, np.nan], + }, + list("abcdefg"), + list("AC"), + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, np.nan, np.nan], + index=list("aaabcdeefg"), + dtype=object, + ), + "B": 1, + "C": ["a", "b", "c", 2, "foo", np.nan, "d", "e", np.nan, np.nan], + }, + list("aaabcdeefg"), + ), ], ) -def test_multi_columns(input_subset, expected_dict, expected_index): +def test_multi_columns( + input_dict, input_index, input_subset, expected_dict, expected_index +): # GH 39240 - df = pd.DataFrame( - { - "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan], - "B": 1, - "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan], - }, - index=list("abcde"), - ) + df = pd.DataFrame(input_dict, input_index) result = df.explode(input_subset) expected = pd.DataFrame(expected_dict, expected_index) tm.assert_frame_equal(result, expected) From eed608c2e5d8d7a2e277b2b4546b8f1144a881f5 Mon Sep 17 00:00:00 2001 From: Emaan Hariri Date: Thu, 21 Apr 2022 10:46:27 -0700 Subject: [PATCH 2/2] Clean up mylen and add github issue # to test --- pandas/core/frame.py | 2 +- pandas/tests/frame/methods/test_explode.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3a60d3cc19287..b4a5e93aec958 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8616,7 +8616,7 @@ def explode( if len(columns) == 1: result = df[columns[0]].explode() else: - mylen = lambda x: len(x) if is_list_like(x) and len(x) else 1 + mylen = lambda x: len(x) if is_list_like(x) and len(x) > 0 else 1 counts0 = self[columns[0]].apply(mylen) for c in columns[1:]: if not all(counts0 == self[c].apply(mylen)): diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 437b6cb07fe08..a6cfe3e994aaa 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -296,7 +296,7 @@ def test_explode_sets(): def test_multi_columns( input_dict, input_index, input_subset, expected_dict, expected_index ): - # GH 39240 + # GH 39240, 46084 df = pd.DataFrame(input_dict, input_index) result = df.explode(input_subset) expected = pd.DataFrame(expected_dict, expected_index)