Skip to content

BUG: df.__setitem__ casting ea to object when setting with scalar key and frame value #48094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 17, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,7 @@ Indexing
- Bug in :meth:`Index.reindex` raising ``AssertionError`` when ``level`` was specified but no :class:`MultiIndex` was given; level is ignored now (:issue:`35132`)
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)
- Bug in :meth:`DataFrame.__setitem__` casting extension array dtypes to object when setting with a scalar key and :class:`DataFrame` as value (:issue:`46896`)
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
- Bug in :meth:`Series.loc` raising with boolean indexer containing ``NA`` when :class:`Index` did not match (:issue:`46551`)
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtype :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
Expand Down
26 changes: 20 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4064,13 +4064,27 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
if isinstance(self.columns, MultiIndex) and isinstance(
loc, (slice, Series, np.ndarray, Index)
):
cols = maybe_droplevels(cols, key)
if len(cols) and not cols.equals(value.columns):
value = value.reindex(cols, axis=1)
cols_droplevel = maybe_droplevels(cols, key)
if len(cols_droplevel) and not cols_droplevel.equals(value.columns):
value = value.reindex(cols_droplevel, axis=1)

# now align rows
arraylike = _reindex_for_setitem(value, self.index)
self._set_item_mgr(key, arraylike)
for col, col_droplevel in zip(cols, cols_droplevel):
self[col] = value[col_droplevel]
return

if is_scalar(cols):
self[cols] = value[value.columns[0]]
return

# now align rows
arraylike = _reindex_for_setitem(value, self.index)
self._set_item_mgr(key, arraylike)
return

if len(value.columns) != 1:
raise ValueError("Expected DataFrame with 1 column.")

self[key] = value[value.columns[0]]

def _iset_item_mgr(
self, loc: int | slice | np.ndarray, value, inplace: bool = False
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ def test_12499():
tm.assert_frame_equal(df, expected)


@pytest.mark.xfail(reason="Too many columns cast to float64")
def test_20476():
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
df = DataFrame(-1, index=range(3), columns=mi)
Expand Down
29 changes: 27 additions & 2 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def test_setitem_error_msmgs(self):
# GH 4107, more descriptive error message
df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"])

msg = "incompatible index of inserted column with frame index"
with pytest.raises(TypeError, match=msg):
msg = "Expected DataFrame with 1 column."
with pytest.raises(ValueError, match=msg):
df["gr"] = df.groupby(["b", "c"]).count()

def test_setitem_benchmark(self):
Expand Down Expand Up @@ -741,6 +741,18 @@ def test_setitem_rhs_dataframe(self):
df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2]))
tm.assert_frame_equal(df, expected)

def test_setitem_frame_overwrite_with_ea_dtype(self, any_numeric_ea_dtype):
# GH#46896
df = DataFrame(columns=["a", "b"], data=[[1, 2], [3, 4]])
df["a"] = DataFrame({"a": [10, 11]}, dtype=any_numeric_ea_dtype)
expected = DataFrame(
{
"a": Series([10, 11], dtype=any_numeric_ea_dtype),
"b": [2, 4],
}
)
tm.assert_frame_equal(df, expected)


class TestSetitemTZAwareValues:
@pytest.fixture
Expand Down Expand Up @@ -903,6 +915,19 @@ def test_frame_setitem_rangeindex_into_new_col(self):
expected = DataFrame({"a": ["b"], "b": [100]}, index=[1])
tm.assert_frame_equal(result, expected)

def test_setitem_frame_keep_ea_dtype(self, any_numeric_ea_dtype):
# GH#46896
df = DataFrame(columns=["a", "b"], data=[[1, 2], [3, 4]])
df["c"] = DataFrame({"a": [10, 11]}, dtype=any_numeric_ea_dtype)
expected = DataFrame(
{
"a": [1, 3],
"b": [2, 4],
"c": Series([10, 11], dtype=any_numeric_ea_dtype),
}
)
tm.assert_frame_equal(df, expected)


class TestDataFrameSetItemSlicing:
def test_setitem_slice_position(self):
Expand Down