Skip to content

Commit 46d1bf7

Browse files
phoflnoatamir
authored andcommitted
BUG: df.__setitem__ casting ea to object when setting with scalar key and frame value (pandas-dev#48094)
* BUG: df.setitem casting ea to object when setting with scalar key and frame value * Change message
1 parent f16deec commit 46d1bf7

File tree

4 files changed

+51
-9
lines changed

4 files changed

+51
-9
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,7 @@ Indexing
970970
- Bug in :meth:`Index.reindex` raising ``AssertionError`` when ``level`` was specified but no :class:`MultiIndex` was given; level is ignored now (:issue:`35132`)
971971
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
972972
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)
973+
- Bug in :meth:`DataFrame.__setitem__` casting extension array dtypes to object when setting with a scalar key and :class:`DataFrame` as value (:issue:`46896`)
973974
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
974975
- Bug in :meth:`Series.loc` raising with boolean indexer containing ``NA`` when :class:`Index` did not match (:issue:`46551`)
975976
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtype :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)

pandas/core/frame.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -4064,13 +4064,30 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
40644064
if isinstance(self.columns, MultiIndex) and isinstance(
40654065
loc, (slice, Series, np.ndarray, Index)
40664066
):
4067-
cols = maybe_droplevels(cols, key)
4068-
if len(cols) and not cols.equals(value.columns):
4069-
value = value.reindex(cols, axis=1)
4067+
cols_droplevel = maybe_droplevels(cols, key)
4068+
if len(cols_droplevel) and not cols_droplevel.equals(value.columns):
4069+
value = value.reindex(cols_droplevel, axis=1)
40704070

4071-
# now align rows
4072-
arraylike = _reindex_for_setitem(value, self.index)
4073-
self._set_item_mgr(key, arraylike)
4071+
for col, col_droplevel in zip(cols, cols_droplevel):
4072+
self[col] = value[col_droplevel]
4073+
return
4074+
4075+
if is_scalar(cols):
4076+
self[cols] = value[value.columns[0]]
4077+
return
4078+
4079+
# now align rows
4080+
arraylike = _reindex_for_setitem(value, self.index)
4081+
self._set_item_mgr(key, arraylike)
4082+
return
4083+
4084+
if len(value.columns) != 1:
4085+
raise ValueError(
4086+
"Cannot set a DataFrame with multiple columns to the single "
4087+
f"column {key}"
4088+
)
4089+
4090+
self[key] = value[value.columns[0]]
40744091

40754092
def _iset_item_mgr(
40764093
self, loc: int | slice | np.ndarray, value, inplace: bool = False

pandas/tests/frame/indexing/test_coercion.py

-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ def test_12499():
164164
tm.assert_frame_equal(df, expected)
165165

166166

167-
@pytest.mark.xfail(reason="Too many columns cast to float64")
168167
def test_20476():
169168
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
170169
df = DataFrame(-1, index=range(3), columns=mi)

pandas/tests/frame/indexing/test_setitem.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ def test_setitem_error_msmgs(self):
9191
# GH 4107, more descriptive error message
9292
df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"])
9393

94-
msg = "incompatible index of inserted column with frame index"
95-
with pytest.raises(TypeError, match=msg):
94+
msg = "Cannot set a DataFrame with multiple columns to the single column gr"
95+
with pytest.raises(ValueError, match=msg):
9696
df["gr"] = df.groupby(["b", "c"]).count()
9797

9898
def test_setitem_benchmark(self):
@@ -741,6 +741,18 @@ def test_setitem_rhs_dataframe(self):
741741
df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2]))
742742
tm.assert_frame_equal(df, expected)
743743

744+
def test_setitem_frame_overwrite_with_ea_dtype(self, any_numeric_ea_dtype):
745+
# GH#46896
746+
df = DataFrame(columns=["a", "b"], data=[[1, 2], [3, 4]])
747+
df["a"] = DataFrame({"a": [10, 11]}, dtype=any_numeric_ea_dtype)
748+
expected = DataFrame(
749+
{
750+
"a": Series([10, 11], dtype=any_numeric_ea_dtype),
751+
"b": [2, 4],
752+
}
753+
)
754+
tm.assert_frame_equal(df, expected)
755+
744756

745757
class TestSetitemTZAwareValues:
746758
@pytest.fixture
@@ -903,6 +915,19 @@ def test_frame_setitem_rangeindex_into_new_col(self):
903915
expected = DataFrame({"a": ["b"], "b": [100]}, index=[1])
904916
tm.assert_frame_equal(result, expected)
905917

918+
def test_setitem_frame_keep_ea_dtype(self, any_numeric_ea_dtype):
919+
# GH#46896
920+
df = DataFrame(columns=["a", "b"], data=[[1, 2], [3, 4]])
921+
df["c"] = DataFrame({"a": [10, 11]}, dtype=any_numeric_ea_dtype)
922+
expected = DataFrame(
923+
{
924+
"a": [1, 3],
925+
"b": [2, 4],
926+
"c": Series([10, 11], dtype=any_numeric_ea_dtype),
927+
}
928+
)
929+
tm.assert_frame_equal(df, expected)
930+
906931

907932
class TestDataFrameSetItemSlicing:
908933
def test_setitem_slice_position(self):

0 commit comments

Comments
 (0)