Skip to content

BUG: loc.setitem coercing rhs df dtypes #50475

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,7 @@ Indexing
^^^^^^^^
- Bug in :meth:`DataFrame.__setitem__` raising when indexer is a :class:`DataFrame` with ``boolean`` dtype (:issue:`47125`)
- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
- Bug in :meth:`DataFrame.loc` when setting :class:`DataFrame` with different dtypes coercing values to single dtype (:issue:`50467`)
- Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
- Bug in :meth:`Series.loc` raising error for out of bounds end of slice indexer (:issue:`50161`)
- Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)
Expand Down
16 changes: 10 additions & 6 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,10 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
# maybe partial set
take_split_path = not self.obj._mgr.is_single_block

if not take_split_path and isinstance(value, ABCDataFrame):
# Avoid cast of values
take_split_path = not value._mgr.is_single_block

# if there is only one block/type, still have to take split path
# unless the block is one-dimensional or it can hold the value
if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
Expand Down Expand Up @@ -2055,7 +2059,7 @@ def _setitem_single_block(self, indexer, value, name: str) -> None:
value = self._align_series(indexer, Series(value))

elif isinstance(value, ABCDataFrame) and name != "iloc":
value = self._align_frame(indexer, value)
value = self._align_frame(indexer, value)._values

# check for chained assignment
self.obj._check_is_chained_assignment_possible()
Expand Down Expand Up @@ -2291,7 +2295,7 @@ def ravel(i):

raise ValueError("Incompatible indexer with Series")

def _align_frame(self, indexer, df: DataFrame):
def _align_frame(self, indexer, df: DataFrame) -> DataFrame:
is_frame = self.ndim == 2

if isinstance(indexer, tuple):
Expand All @@ -2315,15 +2319,15 @@ def _align_frame(self, indexer, df: DataFrame):
if idx is not None and cols is not None:

if df.index.equals(idx) and df.columns.equals(cols):
val = df.copy()._values
val = df.copy()
else:
val = df.reindex(idx, columns=cols)._values
val = df.reindex(idx, columns=cols)
return val

elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
ax = self.obj.index[indexer]
if df.index.equals(ax):
val = df.copy()._values
val = df.copy()
else:

# we have a multi-index and are trying to align
Expand All @@ -2338,7 +2342,7 @@ def _align_frame(self, indexer, df: DataFrame):
"specifying the join levels"
)

val = df.reindex(index=ax)._values
val = df.reindex(index=ax)
return val

raise ValueError("Incompatible indexer with DataFrame")
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1494,6 +1494,15 @@ def test_loc_datetime_assignment_dtype_does_not_change(self, utc, indexer):

tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("indexer, idx", [(tm.loc, 1), (tm.iloc, 2)])
def test_setitem_value_coercing_dtypes(self, indexer, idx):
# GH#50467
df = DataFrame([["1", np.nan], ["2", np.nan], ["3", np.nan]], dtype=object)
rhs = DataFrame([[1, np.nan], [2, np.nan]])
indexer(df)[:idx, :] = rhs
expected = DataFrame([[1, np.nan], [2, np.nan], ["3", np.nan]], dtype=object)
tm.assert_frame_equal(df, expected)


class TestDataFrameIndexingUInt64:
def test_setitem(self, uint64_frame):
Expand Down