diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7585b21abe99..8791f309d67c0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -128,6 +128,7 @@ from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.construction import extract_array, sanitize_masked_array from pandas.core.generic import NDFrame, _shared_docs +from pandas.core.indexers import check_key_length from pandas.core.indexes import base as ibase from pandas.core.indexes.api import ( DatetimeIndex, @@ -3223,9 +3224,8 @@ def _setitem_array(self, key, value): self._check_setitem_copy() self.iloc[indexer] = value else: - if isinstance(value, DataFrame) and self.columns.is_unique: - if len(value.columns) != len(key): - raise ValueError("Columns must be same length as key") + if isinstance(value, DataFrame): + check_key_length(self.columns, key, value) for k1, k2 in zip(key, value.columns): self[k1] = value[k2] else: diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 399953fc17c73..c7011b4339fe7 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -1,6 +1,9 @@ """ Low-dependency indexing utilities. """ +from __future__ import annotations + +from typing import TYPE_CHECKING import warnings import numpy as np @@ -17,6 +20,10 @@ ) from pandas.core.dtypes.generic import ABCIndex, ABCSeries +if TYPE_CHECKING: + from pandas.core.frame import DataFrame + from pandas.core.indexes.base import Index + # ----------------------------------------------------------- # Indexer Identification @@ -376,6 +383,32 @@ def unpack_1tuple(tup): return tup +def check_key_length(columns: Index, key, value: DataFrame): + """ + Checks if a key used as indexer has the same length as the columns it is + associated with. + + Parameters + ---------- + columns : Index The columns of the DataFrame to index. + key : A list-like of keys to index with. + value : DataFrame The value to set for the keys. + + Raises + ------ + ValueError: If the length of key is not equal to the number of columns in value + or if the number of columns referenced by key is not equal to number + of columns. + """ + if columns.is_unique: + if len(value.columns) != len(key): + raise ValueError("Columns must be same length as key") + else: + # Missing keys in columns are represented as -1 + if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns): + raise ValueError("Columns must be same length as key") + + # ----------------------------------------------------------- # Public indexer validation diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 4f8ac49cb17ec..9318764a1b5ad 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -378,11 +378,23 @@ def test_setitem_df_wrong_column_number(self, cols): def test_setitem_listlike_indexer_duplicate_columns(self): # GH#38604 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) - rhs = DataFrame([[10, 11, 12]], columns=["d", "e", "c"]) + rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) df[["a", "b"]] = rhs expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) tm.assert_frame_equal(df, expected) + df[["c", "b"]] = rhs + expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"]) + tm.assert_frame_equal(df, expected) + + def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): + # GH#39403 + df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) + rhs = DataFrame([[10, 11]], columns=["a", "b"]) + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + df[["a", "b"]] = rhs + class TestDataFrameSetItemWithExpansion: def test_setitem_listlike_views(self):