diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f2e833dfe7790..73b5554918da4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3189,10 +3189,20 @@ def _setitem_array(self, key, value): self.iloc[indexer] = value else: if isinstance(value, DataFrame): - if len(value.columns) != len(key): - raise ValueError("Columns must be same length as key") - for k1, k2 in zip(key, value.columns): - self[k1] = value[k2] + columns = value.columns + if len(columns) == len(key): + for k1, k2 in zip(key, columns): + self[k1] = value[k2] + elif isinstance(columns, MultiIndex): + levels0 = columns.levels[0] + if len(levels0) == len(key): + for k1, k2 in zip(key, levels0): + self[k1] = value[k2] + else: + raise ValueError( + "Key must be same length as columns or top level of " + "MultiIndex" + ) else: self.loc._ensure_listlike_indexer(key, axis=1, value=value) indexer = self.loc._get_listlike_indexer( @@ -3221,19 +3231,42 @@ def _setitem_frame(self, key, value): def _set_item_frame_value(self, key, value: "DataFrame") -> None: self._ensure_valid_index(value) - # align right-hand-side columns if self.columns - # is multi-index and self[key] is a sub-frame - if isinstance(self.columns, MultiIndex) and key in self.columns: - loc = self.columns.get_loc(key) - if isinstance(loc, (slice, Series, np.ndarray, Index)): - cols = maybe_droplevels(self.columns[loc], key) - if len(cols) and not cols.equals(value.columns): - value = value.reindex(cols, axis=1) - - # now align rows - value = _reindex_for_setitem(value, self.index) - value = value.T - self._set_item_mgr(key, value) + # standardized key info + key_tup = key if isinstance(key, tuple) else (key,) + key_len = len(key_tup) + + if key in self.columns or key_len == self.columns.nlevels: + # align right-hand-side columns if self.columns + # is multi-index and self[key] is a sub-frame + if isinstance(self.columns, MultiIndex) and key in self.columns: + loc = self.columns.get_loc(key) + if isinstance(loc, (slice, Series, np.ndarray, Index)): + cols = maybe_droplevels(self.columns[loc], key) + if len(cols) and not cols.equals(value.columns): + value = value.reindex(cols, axis=1) + + # now align rows + value = _reindex_for_setitem(value, self.index) + value = value.T + self._set_item_mgr(key, value) + else: + if key_len + value.columns.nlevels != self.columns.nlevels: + raise ValueError( + "Must pass key/value pair that conforms with number of column " + "levels" + ) + + # fill out keys as necessary + if value.columns.nlevels > 1: + key_list = [key_tup + i for i in value.columns] + else: + key_list = [key_tup + (i,) for i in value.columns] + items = MultiIndex.from_tuples(key_list) + + # align and append block + value = _reindex_for_setitem(value, self.index) + value = value.T + self._mgr.append_block(items, value) def _iset_item_mgr(self, loc: int, value) -> None: self._mgr.iset(loc, value) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e939c43015aed..fd2e5f2ee0b7b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1232,6 +1232,26 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False): stacklevel=5, ) + def append_block(self, items, values): + base, size = len(self.items), len(items) + + new_axis = self.items.append(items) + block = make_block( + values=values, ndim=self.ndim, placement=slice(base, base + size) + ) + + blk_no = len(self.blocks) + self._blklocs = np.append(self.blklocs, range(size)) + self._blknos = np.append(self.blknos, size * (blk_no,)) + + self.axes[0] = new_axis + self.blocks += (block,) + + self._known_consolidated = False + + if len(self.blocks) > 100: + self._consolidate_inplace() + def reindex_axis( self, new_index, diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 49eb570c4ffe0..93f4d25e6bab5 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -119,7 +119,7 @@ def test_setitem_list(self, float_frame): tm.assert_series_equal(float_frame["B"], data["A"], check_names=False) tm.assert_series_equal(float_frame["A"], data["B"], check_names=False) - msg = "Columns must be same length as key" + msg = "Key must be same length as columns or top level of MultiIndex" with pytest.raises(ValueError, match=msg): data[["A"]] = float_frame[["A", "B"]] newcolumndata = range(len(data.index) - 1) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 9a3039c28416c..013ab7c36d724 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas._libs.index as _index from pandas.errors import PerformanceWarning @@ -93,3 +94,102 @@ def test_multiindex_with_datatime_level_preserves_freq(self): result = df.loc[0].index tm.assert_index_equal(result, dti) assert result.freq == dti.freq + + def test_multiindex_get_loc_list_raises(self): + # https://github.com/pandas-dev/pandas/issues/35878 + idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = "unhashable type" + with pytest.raises(TypeError, match=msg): + idx.get_loc([]) + + def test_multiindex_frame_assign(self): + df0 = DataFrame({"a": [0, 1, 2, 3], "b": [3, 4, 5, 6]}) + df1 = pd.concat({"x": df0, "y": df0}, axis=1) + df2 = pd.concat({"q": df1, "r": df1}, axis=1) + + # level one assign + result = df2.copy() + result["m"] = result["q"] + result["r"] + expected = pd.concat({"q": df1, "r": df1, "m": 2 * df1}, axis=1) + tm.assert_frame_equal(result, expected) + + # level one assign - multiple + result = df2.copy() + result[["m", "n"]] = 2 * result[["q", "r"]] + expected = pd.concat({"q": df1, "r": df1, "m": 2 * df1, "n": 2 * df1}, axis=1) + tm.assert_frame_equal(result, expected) + + # level two assign + result = df2.copy() + result["m", "x"] = df2["q", "x"] + df2["q", "y"] + expected = pd.concat( + {"q": df1, "r": df1, "m": pd.concat({"x": 2 * df0}, axis=1)}, axis=1 + ) + tm.assert_frame_equal(result, expected) + + # level two assign - multiple (seems like getitem is not caught up here) + result = df2.copy() + result[[("m", "x"), ("n", "y")]] = 2 * df2["q"] + expected = pd.concat( + { + "q": df1, + "r": df1, + "m": pd.concat({"x": 2 * df0}, axis=1), + "n": pd.concat({"y": 2 * df0}, axis=1), + }, + axis=1, + ) + tm.assert_frame_equal(result, expected) + + # level three assign + result = df2.copy() + result["m", "x", "a"] = df2["q", "x", "a"] + df2["q", "x", "b"] + expected = pd.concat( + { + "q": df1, + "r": df1, + "m": pd.concat( + {"x": pd.concat({"a": df0["a"] + df0["b"]}, axis=1)}, axis=1 + ), + }, + axis=1, + ) + tm.assert_frame_equal(result, expected) + + # level three assign - multiple + result = df2.copy() + result[[("m", "x", "a"), ("n", "y", "b")]] = 2 * df2["q", "x"] + expected = pd.concat( + { + "q": df1, + "r": df1, + "m": pd.concat({"x": pd.concat({"a": 2 * df0["a"]}, axis=1)}, axis=1), + "n": pd.concat({"y": pd.concat({"b": 2 * df0["b"]}, axis=1)}, axis=1), + }, + axis=1, + ) + tm.assert_frame_equal(result, expected) + + # invalid usage + msg = "Must pass key/value pair that conforms with number of column levels" + msg2 = "Wrong number of items passed 2, placement implies 1" + + # too few levels at level one + with pytest.raises(ValueError, match=msg): + df2["m"] = df0 + + # too few levels at level two - this appears to be desired + # with pytest.raises(ValueError, match=msg): + # df2["m", "x"] = df0["a"] + + # too many levels at level one + with pytest.raises(ValueError, match=msg): + df2["m"] = df2 + + # too many levels at level two + with pytest.raises(ValueError, match=msg): + df2["m", "x"] = df1 + + # too many levels at level three + with pytest.raises(ValueError, match=msg2): + df2["m", "x", "a"] = df0