From 3e6303ff037329e4e7dc5ecee21cf243290be4d0 Mon Sep 17 00:00:00 2001 From: Azuk 443 Date: Wed, 24 May 2023 13:12:08 +0800 Subject: [PATCH 1/6] FIX: check for ndarray dimension in DataFrame.__setitem__ --- pandas/core/frame.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8afb3ee96ba94..6679d517dad51 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3966,6 +3966,24 @@ def __setitem__(self, key, value): ): # Column to set is duplicated self._setitem_array([key], value) + elif ( + isinstance(value, np.ndarray) + and value.ndim > 1 + and self.columns.is_unique + ): + # TODO: a check for MultiIndex should be added + if isinstance(self.columns, MultiIndex): + self._set_item(key, value) + return + # squeeze 2d ndarray to 1d if possible + # this keeps the backward compatability + if np.ndim(value) == 2 and (1 in np.shape(value)): + # if value is np.matrix, convert to np.ndarray + value = np.asarray(value).flatten() + self._set_item(key, value) + else: + # avoid assign non 1d array to column + raise ValueError(f"Expected a 1D array, got an array with shape {value.shape}") else: # set column self._set_item(key, value) From 6812b9dd57c1b185e999a6b6054b6ca5bd01fe76 Mon Sep 17 00:00:00 2001 From: Azuk 443 Date: Wed, 24 May 2023 16:28:38 +0800 Subject: [PATCH 2/6] FIX: check for ndarray dimension in BlockManager.insert --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2a7c0536c66a4..b5cbde6dbd65c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1401,7 +1401,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: # insert to the axis; this could possibly raise a TypeError new_axis = self.items.insert(loc, item) - if value.ndim == 2: + if value.ndim >= 2: value = value.T if len(value) > 1: raise ValueError( From d3f1f69c81956b7ee6371b46082c7948b6504959 Mon Sep 17 00:00:00 2001 From: Azuk 443 Date: Wed, 24 May 2023 16:29:27 +0800 Subject: [PATCH 3/6] FIX: add test for ndarray value set --- pandas/tests/frame/test_constructors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 47e307f561cf4..a011da84804b2 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -283,6 +283,11 @@ def test_constructor_cast_failure(self): with pytest.raises(ValueError, match=msg): df["test"] = np.ones((4, 2)) + # this is not ok + msg = "Expected a 1D array, got an array with shape \\(4, 2, 3\\)" + with pytest.raises(ValueError, match=msg): + df["test"] = np.ones((4, 2, 3)) + # this is ok df["foo2"] = np.ones((4, 2)).tolist() From b821bd18c54d177a226e3e8b3c2bfcd2c8ed6561 Mon Sep 17 00:00:00 2001 From: Azuk 443 Date: Wed, 24 May 2023 16:36:34 +0800 Subject: [PATCH 4/6] DOC: Fixing #51925 and #53366 --- doc/source/whatsnew/v2.1.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 11533647ca124..bcd21800bcda0 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -438,6 +438,8 @@ Reshaping - Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`) - Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`) - Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`) +- Bug in :meth:`DataFrame.__setitem__` allows assignments for multiple-dimension ndarray into one column (:issue:`51925`) +- Bug in :meth:`BlockManager.insert` allows assignments for multiple-dimension ndarray into one column (:issue:`53366`) - Sparse From a4d454cd8edb09a49013e9b3cbe037778054048e Mon Sep 17 00:00:00 2001 From: Azuk 443 Date: Wed, 24 May 2023 16:41:46 +0800 Subject: [PATCH 5/6] fix typo and code format --- pandas/core/frame.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6679d517dad51..59282f2e8fe12 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3967,23 +3967,23 @@ def __setitem__(self, key, value): # Column to set is duplicated self._setitem_array([key], value) elif ( - isinstance(value, np.ndarray) - and value.ndim > 1 - and self.columns.is_unique + isinstance(value, np.ndarray) and value.ndim > 1 and self.columns.is_unique ): # TODO: a check for MultiIndex should be added if isinstance(self.columns, MultiIndex): self._set_item(key, value) return # squeeze 2d ndarray to 1d if possible - # this keeps the backward compatability + # this keeps the backward compatibility if np.ndim(value) == 2 and (1 in np.shape(value)): # if value is np.matrix, convert to np.ndarray value = np.asarray(value).flatten() self._set_item(key, value) else: # avoid assign non 1d array to column - raise ValueError(f"Expected a 1D array, got an array with shape {value.shape}") + raise ValueError( + f"Expected a 1D array, got an array with shape {value.shape}" + ) else: # set column self._set_item(key, value) From c4d4ee95b6a22a33e160698ca1a2a2ca1b835b1a Mon Sep 17 00:00:00 2001 From: Azuk 443 Date: Wed, 24 May 2023 17:30:55 +0800 Subject: [PATCH 6/6] format doc --- doc/source/whatsnew/v2.1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index bcd21800bcda0..36a24c93600b8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -432,14 +432,14 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :func:`crosstab` when ``dropna=False`` would not keep ``np.nan`` in the result (:issue:`10772`) +- Bug in :meth:`BlockManager.insert` allows assignments for multiple-dimension ndarray into one column (:issue:`53366`) +- Bug in :meth:`DataFrame.__setitem__` allows assignments for multiple-dimension ndarray into one column (:issue:`51925`) - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`) - Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`) - Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`) - Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`) - Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`) - Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`) -- Bug in :meth:`DataFrame.__setitem__` allows assignments for multiple-dimension ndarray into one column (:issue:`51925`) -- Bug in :meth:`BlockManager.insert` allows assignments for multiple-dimension ndarray into one column (:issue:`53366`) - Sparse