From f4dc9165c2c9301d6245e964e6ef980dfc900579 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 21 Jul 2021 21:07:29 +0530 Subject: [PATCH 1/5] BUG: misleading error creating df from 2d array --- pandas/core/internals/construction.py | 2 ++ pandas/tests/frame/test_constructors.py | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 22cce5c614d5a..469381277a279 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -615,6 +615,8 @@ def _extract_index(data) -> Index: elif is_list_like(val) and getattr(val, "ndim", 1) == 1: have_raw_arrays = True raw_lengths.append(len(val)) + elif isinstance(val, np.ndarray) and getattr(val, "ndim", 1) > 1: + raise ValueError("Data must be 1-dimensional") if not indexes and not raw_lengths: raise ValueError("If using all scalar values, you must pass an index") diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c992606cc88af..9b42da0796b6d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2530,6 +2530,19 @@ def test_from_2d_object_array_of_periods_or_intervals(self): expected = DataFrame({0: pi, 1: ii, 2: pi, 3: ii}) tm.assert_frame_equal(df3, expected) + @pytest.mark.parametrize( + "col_a, col_b", + [ + ([[1], [2]], np.array([[1], [2]])), + (np.array([[1], [2]]), [[1], [2]]), + (np.array([[1], [2]]), np.array([[1], [2]])), + ], + ) + def test_error_from_2darray(self, col_a, col_b): + msg = "Data must be 1-dimensional" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": col_a, "b": col_b}) + class TestDataFrameConstructorWithDtypeCoercion: def test_floating_values_integer_dtype(self): From 8d968f26a6ff78abfd61de0d92838cdbc66f661a Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 24 Jul 2021 14:26:54 +0530 Subject: [PATCH 2/5] amended as suggested --- pandas/core/internals/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 469381277a279..96f74e4229670 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -615,7 +615,7 @@ def _extract_index(data) -> Index: elif is_list_like(val) and getattr(val, "ndim", 1) == 1: have_raw_arrays = True raw_lengths.append(len(val)) - elif isinstance(val, np.ndarray) and getattr(val, "ndim", 1) > 1: + elif isinstance(val, np.ndarray) and val.ndim > 1: raise ValueError("Data must be 1-dimensional") if not indexes and not raw_lengths: From b189f10b016076d8cad26407c86bdb45e95d1256 Mon Sep 17 00:00:00 2001 From: Shoham Debnath Date: Mon, 26 Jul 2021 23:59:52 +0530 Subject: [PATCH 3/5] changed error msg as suggested --- pandas/core/internals/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 96f74e4229670..a99d97770ce27 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -616,7 +616,7 @@ def _extract_index(data) -> Index: have_raw_arrays = True raw_lengths.append(len(val)) elif isinstance(val, np.ndarray) and val.ndim > 1: - raise ValueError("Data must be 1-dimensional") + raise ValueError("Per-column arrays must each be 1-dimensional") if not indexes and not raw_lengths: raise ValueError("If using all scalar values, you must pass an index") From 4bb7180c264cc7f17f3f2c2a7c388b22be920f48 Mon Sep 17 00:00:00 2001 From: Shoham Debnath Date: Tue, 27 Jul 2021 12:59:39 +0530 Subject: [PATCH 4/5] updated error msg --- pandas/tests/frame/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 9b42da0796b6d..71f9544df42a3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2539,7 +2539,7 @@ def test_from_2d_object_array_of_periods_or_intervals(self): ], ) def test_error_from_2darray(self, col_a, col_b): - msg = "Data must be 1-dimensional" + msg = "Per-column arrays must each be 1-dimensional" with pytest.raises(ValueError, match=msg): DataFrame({"a": col_a, "b": col_b}) From b6381df8783e3602e27178b578b2769008ace839 Mon Sep 17 00:00:00 2001 From: Shoham Debnath Date: Wed, 28 Jul 2021 11:20:32 +0530 Subject: [PATCH 5/5] included whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 8d96d49daba4f..47f0ebca0dbc1 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -262,7 +262,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`) - Sparse