From 80e07536a661375553275176ed96c0c66d71eadc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 8 May 2023 22:09:13 +0200 Subject: [PATCH 1/3] BUG: Setting frame into df with dup cols loses dtypes --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/frame.py | 11 ++++++++--- pandas/tests/frame/indexing/test_setitem.py | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 258c14cec7925..2d505cd7a2ed1 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -361,7 +361,7 @@ Interval Indexing ^^^^^^^^ -- +- Bug in :meth:`DataFrame.__setitem__` losing dtype when setting a :class:`DataFrame` into duplicated columns (:issue:`53143`) - Missing diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 56c58bc9347e0..03a7ef1420ced 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4101,9 +4101,14 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: self[cols] = value[value.columns[0]] return - # now align rows - arraylike, _ = _reindex_for_setitem(value, self.index) - self._set_item_mgr(key, arraylike) + if isinstance(loc, slice): + locs = np.arange(loc.start, loc.stop, loc.step) + elif is_scalar(loc): + locs = [loc] + else: + locs = loc.nonzero()[0] + for idx, loc in enumerate(locs): + self.isetitem(loc, value.iloc[:, idx]) return if len(value.columns) != 1: diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index a51955548232b..af3632bffe948 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1283,3 +1283,19 @@ def test_setitem_iloc_with_numpy_array(self, dtype): expected = DataFrame({"a": [2, 1, 1]}, dtype=dtype) tm.assert_frame_equal(df, expected) + + def test_setitem_frame_dup_cols_dtype(self): + # GH#53143 + df = DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=["a", "b", "a", "c"]) + rhs = DataFrame([[0, 1.5], [2, 2.5]], columns=["a", "a"]) + df["a"] = rhs + expected = DataFrame( + [[0, 2, 1.5, 4], [2, 5, 2.5, 7]], columns=["a", "b", "a", "c"] + ) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + rhs = DataFrame([[0, 1.5], [2, 2.5]], columns=["a", "a"]) + df["a"] = rhs + expected = DataFrame([[0, 1.5, 3], [2, 2.5, 6]], columns=["a", "a", "b"]) + tm.assert_frame_equal(df, expected) From fd487300ed963b57975da6f74d6f593784de8b02 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 8 May 2023 22:11:00 +0200 Subject: [PATCH 2/3] BUG: Setting frame into df with dup cols loses dtypes --- pandas/core/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 03a7ef1420ced..783e0f404951e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4107,9 +4107,8 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: locs = [loc] else: locs = loc.nonzero()[0] - for idx, loc in enumerate(locs): - self.isetitem(loc, value.iloc[:, idx]) - return + + return self.isetitem(locs, value) if len(value.columns) != 1: raise ValueError( From b4dc748ffdce3f3bd6c6b48c8f9a20ffd19d8fae Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 8 May 2023 23:52:03 +0200 Subject: [PATCH 3/3] Fix mypy --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 783e0f404951e..485cc9db5ffe7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4101,6 +4101,7 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: self[cols] = value[value.columns[0]] return + locs: np.ndarray | list if isinstance(loc, slice): locs = np.arange(loc.start, loc.stop, loc.step) elif is_scalar(loc):