From 79fa0add521a56877b9fa7c4fc1a1f107f6474ae Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 22 Jan 2021 21:27:28 +0100 Subject: [PATCH 1/2] BUG: DataFrame.__setitem__ not raising ValueError when rhs is df and has wrong number of columns --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/frame.py | 22 +++++++++++++-------- pandas/tests/frame/indexing/test_setitem.py | 17 ++++++++++++++++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index ff11ebc022ffb..2960199f2884d 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -286,6 +286,7 @@ Indexing - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` with empty :class:`DataFrame` and specified columns for string indexer and non empty :class:`DataFrame` to set (:issue:`38831`) - Bug in :meth:`DataFrame.loc.__setitem__` raising ValueError when expanding unique column for :class:`DataFrame` with duplicate columns (:issue:`38521`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) +- Bug in :meth:`DataFrame.__setitem__` not raising ``ValueError`` when right hand side is a :class:`DataFrame` with wrong number of columns (:issue:`38604`) - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) - Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`) - Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bf22ca436414a..9016062eb0262 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3223,7 +3223,7 @@ def _setitem_array(self, key, value): self._check_setitem_copy() self.iloc[indexer] = value else: - if isinstance(value, DataFrame): + if isinstance(value, DataFrame) and self.columns.is_unique: if len(value.columns) != len(key): raise ValueError("Columns must be same length as key") for k1, k2 in zip(key, value.columns): @@ -3256,14 +3256,20 @@ def _setitem_frame(self, key, value): def _set_item_frame_value(self, key, value: DataFrame) -> None: self._ensure_valid_index(value) - # align right-hand-side columns if self.columns - # is multi-index and self[key] is a sub-frame - if isinstance(self.columns, MultiIndex) and key in self.columns: + if key in self.columns: loc = self.columns.get_loc(key) - if isinstance(loc, (slice, Series, np.ndarray, Index)): - cols = maybe_droplevels(self.columns[loc], key) - if len(cols) and not cols.equals(value.columns): - value = value.reindex(cols, axis=1) + cols = self.columns[loc] + len_cols = 1 if is_scalar(cols) else len(cols) + if len_cols != len(value.columns): + raise ValueError("Columns must be same length as key") + + # align right-hand-side columns if self.columns + # is multi-index and self[key] is a sub-frame + if isinstance(self.columns, MultiIndex): + if isinstance(loc, (slice, Series, np.ndarray, Index)): + cols = maybe_droplevels(cols, key) + if len(cols) and not cols.equals(value.columns): + value = value.reindex(cols, axis=1) # now align rows value = _reindex_for_setitem(value, self.index) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 527ad666f45a2..4f8ac49cb17ec 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -366,6 +366,23 @@ def test_setitem_frame_duplicate_columns(self): ) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) + def test_setitem_df_wrong_column_number(self, cols): + # GH#38604 + df = DataFrame([[1, 2, 3]], columns=cols) + rhs = DataFrame([[10, 11]], columns=["d", "e"]) + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + df["a"] = rhs + + def test_setitem_listlike_indexer_duplicate_columns(self): + # GH#38604 + df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) + rhs = DataFrame([[10, 11, 12]], columns=["d", "e", "c"]) + df[["a", "b"]] = rhs + expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) + tm.assert_frame_equal(df, expected) + class TestDataFrameSetItemWithExpansion: def test_setitem_listlike_views(self): From 4b40ef6a68c0ee5b7eab1a86ea5909f8d5bbe93d Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 23 Jan 2021 01:34:00 +0100 Subject: [PATCH 2/2] Reformat if condition --- pandas/core/frame.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9016062eb0262..c7585b21abe99 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3256,6 +3256,7 @@ def _setitem_frame(self, key, value): def _set_item_frame_value(self, key, value: DataFrame) -> None: self._ensure_valid_index(value) + # align columns if key in self.columns: loc = self.columns.get_loc(key) cols = self.columns[loc] @@ -3265,11 +3266,12 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: # align right-hand-side columns if self.columns # is multi-index and self[key] is a sub-frame - if isinstance(self.columns, MultiIndex): - if isinstance(loc, (slice, Series, np.ndarray, Index)): - cols = maybe_droplevels(cols, key) - if len(cols) and not cols.equals(value.columns): - value = value.reindex(cols, axis=1) + if isinstance(self.columns, MultiIndex) and isinstance( + loc, (slice, Series, np.ndarray, Index) + ): + cols = maybe_droplevels(cols, key) + if len(cols) and not cols.equals(value.columns): + value = value.reindex(cols, axis=1) # now align rows value = _reindex_for_setitem(value, self.index)