Skip to content

ENH: allow for top and mid-level assignment to DataFrames with MultIndex columns #7475 #36755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 50 additions & 17 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3189,10 +3189,20 @@ def _setitem_array(self, key, value):
self.iloc[indexer] = value
else:
if isinstance(value, DataFrame):
if len(value.columns) != len(key):
raise ValueError("Columns must be same length as key")
for k1, k2 in zip(key, value.columns):
self[k1] = value[k2]
columns = value.columns
if len(columns) == len(key):
for k1, k2 in zip(key, columns):
self[k1] = value[k2]
elif isinstance(columns, MultiIndex):
levels0 = columns.levels[0]
if len(levels0) == len(key):
for k1, k2 in zip(key, levels0):
self[k1] = value[k2]
else:
raise ValueError(
"Key must be same length as columns or top level of "
"MultiIndex"
)
else:
self.loc._ensure_listlike_indexer(key, axis=1, value=value)
indexer = self.loc._get_listlike_indexer(
Expand Down Expand Up @@ -3221,19 +3231,42 @@ def _setitem_frame(self, key, value):
def _set_item_frame_value(self, key, value: "DataFrame") -> None:
self._ensure_valid_index(value)

# align right-hand-side columns if self.columns
# is multi-index and self[key] is a sub-frame
if isinstance(self.columns, MultiIndex) and key in self.columns:
loc = self.columns.get_loc(key)
if isinstance(loc, (slice, Series, np.ndarray, Index)):
cols = maybe_droplevels(self.columns[loc], key)
if len(cols) and not cols.equals(value.columns):
value = value.reindex(cols, axis=1)

# now align rows
value = _reindex_for_setitem(value, self.index)
value = value.T
self._set_item_mgr(key, value)
# standardized key info
key_tup = key if isinstance(key, tuple) else (key,)
key_len = len(key_tup)

if key in self.columns or key_len == self.columns.nlevels:
# align right-hand-side columns if self.columns
# is multi-index and self[key] is a sub-frame
if isinstance(self.columns, MultiIndex) and key in self.columns:
loc = self.columns.get_loc(key)
if isinstance(loc, (slice, Series, np.ndarray, Index)):
cols = maybe_droplevels(self.columns[loc], key)
if len(cols) and not cols.equals(value.columns):
value = value.reindex(cols, axis=1)

# now align rows
value = _reindex_for_setitem(value, self.index)
value = value.T
self._set_item_mgr(key, value)
else:
if key_len + value.columns.nlevels != self.columns.nlevels:
raise ValueError(
"Must pass key/value pair that conforms with number of column "
"levels"
)

# fill out keys as necessary
if value.columns.nlevels > 1:
key_list = [key_tup + i for i in value.columns]
else:
key_list = [key_tup + (i,) for i in value.columns]
items = MultiIndex.from_tuples(key_list)

# align and append block
value = _reindex_for_setitem(value, self.index)
value = value.T
self._mgr.append_block(items, value)

def _iset_item_mgr(self, loc: int, value) -> None:
self._mgr.iset(loc, value)
Expand Down
20 changes: 20 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,26 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False):
stacklevel=5,
)

def append_block(self, items, values):
base, size = len(self.items), len(items)

new_axis = self.items.append(items)
block = make_block(
values=values, ndim=self.ndim, placement=slice(base, base + size)
)

blk_no = len(self.blocks)
self._blklocs = np.append(self.blklocs, range(size))
self._blknos = np.append(self.blknos, size * (blk_no,))

self.axes[0] = new_axis
self.blocks += (block,)

self._known_consolidated = False

if len(self.blocks) > 100:
self._consolidate_inplace()

def reindex_axis(
self,
new_index,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def test_setitem_list(self, float_frame):
tm.assert_series_equal(float_frame["B"], data["A"], check_names=False)
tm.assert_series_equal(float_frame["A"], data["B"], check_names=False)

msg = "Columns must be same length as key"
msg = "Key must be same length as columns or top level of MultiIndex"
with pytest.raises(ValueError, match=msg):
data[["A"]] = float_frame[["A", "B"]]
newcolumndata = range(len(data.index) - 1)
Expand Down
100 changes: 100 additions & 0 deletions pandas/tests/indexing/multiindex/test_multiindex.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

import pandas._libs.index as _index
from pandas.errors import PerformanceWarning
Expand Down Expand Up @@ -93,3 +94,102 @@ def test_multiindex_with_datatime_level_preserves_freq(self):
result = df.loc[0].index
tm.assert_index_equal(result, dti)
assert result.freq == dti.freq

def test_multiindex_get_loc_list_raises(self):
# https://github.com/pandas-dev/pandas/issues/35878
idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)])
msg = "unhashable type"
with pytest.raises(TypeError, match=msg):
idx.get_loc([])

def test_multiindex_frame_assign(self):
df0 = DataFrame({"a": [0, 1, 2, 3], "b": [3, 4, 5, 6]})
df1 = pd.concat({"x": df0, "y": df0}, axis=1)
df2 = pd.concat({"q": df1, "r": df1}, axis=1)

# level one assign
result = df2.copy()
result["m"] = result["q"] + result["r"]
expected = pd.concat({"q": df1, "r": df1, "m": 2 * df1}, axis=1)
tm.assert_frame_equal(result, expected)

# level one assign - multiple
result = df2.copy()
result[["m", "n"]] = 2 * result[["q", "r"]]
expected = pd.concat({"q": df1, "r": df1, "m": 2 * df1, "n": 2 * df1}, axis=1)
tm.assert_frame_equal(result, expected)

# level two assign
result = df2.copy()
result["m", "x"] = df2["q", "x"] + df2["q", "y"]
expected = pd.concat(
{"q": df1, "r": df1, "m": pd.concat({"x": 2 * df0}, axis=1)}, axis=1
)
tm.assert_frame_equal(result, expected)

# level two assign - multiple (seems like getitem is not caught up here)
result = df2.copy()
result[[("m", "x"), ("n", "y")]] = 2 * df2["q"]
expected = pd.concat(
{
"q": df1,
"r": df1,
"m": pd.concat({"x": 2 * df0}, axis=1),
"n": pd.concat({"y": 2 * df0}, axis=1),
},
axis=1,
)
tm.assert_frame_equal(result, expected)

# level three assign
result = df2.copy()
result["m", "x", "a"] = df2["q", "x", "a"] + df2["q", "x", "b"]
expected = pd.concat(
{
"q": df1,
"r": df1,
"m": pd.concat(
{"x": pd.concat({"a": df0["a"] + df0["b"]}, axis=1)}, axis=1
),
},
axis=1,
)
tm.assert_frame_equal(result, expected)

# level three assign - multiple
result = df2.copy()
result[[("m", "x", "a"), ("n", "y", "b")]] = 2 * df2["q", "x"]
expected = pd.concat(
{
"q": df1,
"r": df1,
"m": pd.concat({"x": pd.concat({"a": 2 * df0["a"]}, axis=1)}, axis=1),
"n": pd.concat({"y": pd.concat({"b": 2 * df0["b"]}, axis=1)}, axis=1),
},
axis=1,
)
tm.assert_frame_equal(result, expected)

# invalid usage
msg = "Must pass key/value pair that conforms with number of column levels"
msg2 = "Wrong number of items passed 2, placement implies 1"

# too few levels at level one
with pytest.raises(ValueError, match=msg):
df2["m"] = df0

# too few levels at level two - this appears to be desired
# with pytest.raises(ValueError, match=msg):
# df2["m", "x"] = df0["a"]

# too many levels at level one
with pytest.raises(ValueError, match=msg):
df2["m"] = df2

# too many levels at level two
with pytest.raises(ValueError, match=msg):
df2["m", "x"] = df1

# too many levels at level three
with pytest.raises(ValueError, match=msg2):
df2["m", "x", "a"] = df0