From 8924277fa3dbe775f46e679ab8bd97b293e465ea Mon Sep 17 00:00:00 2001 From: Shoham Debnath Date: Wed, 28 Jul 2021 18:07:39 +0530 Subject: [PATCH] BUG: misleading error creating df from 2d array (#42646) --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/internals/construction.py | 2 ++ pandas/tests/frame/test_constructors.py | 13 +++++++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 63644384cad85..432dd46000eb3 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -265,6 +265,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ +- Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`) - :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`) - Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`) - diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index f568ecf7deb9d..7f3d246a6fda6 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -615,6 +615,8 @@ def _extract_index(data) -> Index: elif is_list_like(val) and getattr(val, "ndim", 1) == 1: have_raw_arrays = True raw_lengths.append(len(val)) + elif isinstance(val, np.ndarray) and val.ndim > 1: + raise ValueError("Per-column arrays must each be 1-dimensional") if not indexes and not raw_lengths: raise ValueError("If using all scalar values, you must pass an index") diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c992606cc88af..71f9544df42a3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2530,6 +2530,19 @@ def test_from_2d_object_array_of_periods_or_intervals(self): expected = DataFrame({0: pi, 1: ii, 2: pi, 3: ii}) tm.assert_frame_equal(df3, expected) + @pytest.mark.parametrize( + "col_a, col_b", + [ + ([[1], [2]], np.array([[1], [2]])), + (np.array([[1], [2]]), [[1], [2]]), + (np.array([[1], [2]]), np.array([[1], [2]])), + ], + ) + def test_error_from_2darray(self, col_a, col_b): + msg = "Per-column arrays must each be 1-dimensional" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": col_a, "b": col_b}) + class TestDataFrameConstructorWithDtypeCoercion: def test_floating_values_integer_dtype(self):