diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a1a2149da7cf6..94167299efa90 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -968,6 +968,7 @@ Indexing - Bug in :meth:`Index.reindex` raising ``AssertionError`` when ``level`` was specified but no :class:`MultiIndex` was given; level is ignored now (:issue:`35132`) - Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) - Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`) +- Bug in :meth:`DataFrame.__setitem__` casting extension array dtypes to object when setting with a scalar key and :class:`DataFrame` as value (:issue:`46896`) - Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) - Bug in :meth:`Series.loc` raising with boolean indexer containing ``NA`` when :class:`Index` did not match (:issue:`46551`) - Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtype :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6cfca4ebdc612..2daf93bbebe48 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4064,13 +4064,30 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: if isinstance(self.columns, MultiIndex) and isinstance( loc, (slice, Series, np.ndarray, Index) ): - cols = maybe_droplevels(cols, key) - if len(cols) and not cols.equals(value.columns): - value = value.reindex(cols, axis=1) + cols_droplevel = maybe_droplevels(cols, key) + if len(cols_droplevel) and not cols_droplevel.equals(value.columns): + value = value.reindex(cols_droplevel, axis=1) - # now align rows - arraylike = _reindex_for_setitem(value, self.index) - self._set_item_mgr(key, arraylike) + for col, col_droplevel in zip(cols, cols_droplevel): + self[col] = value[col_droplevel] + return + + if is_scalar(cols): + self[cols] = value[value.columns[0]] + return + + # now align rows + arraylike = _reindex_for_setitem(value, self.index) + self._set_item_mgr(key, arraylike) + return + + if len(value.columns) != 1: + raise ValueError( + "Cannot set a DataFrame with multiple columns to the single " + f"column {key}" + ) + + self[key] = value[value.columns[0]] def _iset_item_mgr( self, loc: int | slice | np.ndarray, value, inplace: bool = False diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index cf4af32fc887a..c970558162707 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -164,7 +164,6 @@ def test_12499(): tm.assert_frame_equal(df, expected) -@pytest.mark.xfail(reason="Too many columns cast to float64") def test_20476(): mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) df = DataFrame(-1, index=range(3), columns=mi) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cd547819dbe94..6b19738becc8e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -91,8 +91,8 @@ def test_setitem_error_msmgs(self): # GH 4107, more descriptive error message df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) - msg = "incompatible index of inserted column with frame index" - with pytest.raises(TypeError, match=msg): + msg = "Cannot set a DataFrame with multiple columns to the single column gr" + with pytest.raises(ValueError, match=msg): df["gr"] = df.groupby(["b", "c"]).count() def test_setitem_benchmark(self): @@ -741,6 +741,18 @@ def test_setitem_rhs_dataframe(self): df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2])) tm.assert_frame_equal(df, expected) + def test_setitem_frame_overwrite_with_ea_dtype(self, any_numeric_ea_dtype): + # GH#46896 + df = DataFrame(columns=["a", "b"], data=[[1, 2], [3, 4]]) + df["a"] = DataFrame({"a": [10, 11]}, dtype=any_numeric_ea_dtype) + expected = DataFrame( + { + "a": Series([10, 11], dtype=any_numeric_ea_dtype), + "b": [2, 4], + } + ) + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture @@ -903,6 +915,19 @@ def test_frame_setitem_rangeindex_into_new_col(self): expected = DataFrame({"a": ["b"], "b": [100]}, index=[1]) tm.assert_frame_equal(result, expected) + def test_setitem_frame_keep_ea_dtype(self, any_numeric_ea_dtype): + # GH#46896 + df = DataFrame(columns=["a", "b"], data=[[1, 2], [3, 4]]) + df["c"] = DataFrame({"a": [10, 11]}, dtype=any_numeric_ea_dtype) + expected = DataFrame( + { + "a": [1, 3], + "b": [2, 4], + "c": Series([10, 11], dtype=any_numeric_ea_dtype), + } + ) + tm.assert_frame_equal(df, expected) + class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self):