diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 3f1ac9bb18f75..f9561c9aa4325 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -719,6 +719,7 @@ Reshaping - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) - Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`) - Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`) +- Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`) Sparse ^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5654ba469d05a..3b0beaa9d5373 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8848,7 +8848,7 @@ def explode( if len(columns) == 1: result = df[columns[0]].explode() else: - mylen = lambda x: len(x) if is_list_like(x) else -1 + mylen = lambda x: len(x) if (is_list_like(x) and len(x) > 0) else 1 counts0 = self[columns[0]].apply(mylen) for c in columns[1:]: if not all(counts0 == self[c].apply(mylen)): diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 6d9874dc58c17..d1e4a603c5710 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -280,3 +280,24 @@ def test_multi_columns(input_subset, expected_dict, expected_index): result = df.explode(input_subset) expected = pd.DataFrame(expected_dict, expected_index) tm.assert_frame_equal(result, expected) + + +def test_multi_columns_nan_empty(): + # GH 46084 + df = pd.DataFrame( + { + "A": [[0, 1], [5], [], [2, 3]], + "B": [9, 8, 7, 6], + "C": [[1, 2], np.nan, [], [3, 4]], + } + ) + result = df.explode(["A", "C"]) + expected = pd.DataFrame( + { + "A": np.array([0, 1, 5, np.nan, 2, 3], dtype=object), + "B": [9, 9, 8, 7, 6, 6], + "C": np.array([1, 2, np.nan, np.nan, 3, 4], dtype=object), + }, + index=[0, 0, 1, 2, 3, 3], + ) + tm.assert_frame_equal(result, expected)