Skip to content

Commit 573c063

Browse files
authored
BUG: frame.loc[2:, 'z'] not setting inplace when multi-block (#44345)
1 parent 2d3644c commit 573c063

File tree

6 files changed

+65
-23
lines changed

6 files changed

+65
-23
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,7 @@ Indexing
575575
- Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`)
576576
- Bug in :meth:`Series.__setitem__` with an integer dtype other than ``int64`` setting with a ``range`` object unnecessarily upcasting to ``int64`` (:issue:`44261`)
577577
- Bug in :meth:`Series.__setitem__` with a boolean mask indexer setting a listlike value of length 1 incorrectly broadcasting that value (:issue:`44265`)
578+
- Bug in :meth:`DataFrame.loc.__setitem__` and :meth:`DataFrame.iloc.__setitem__` with mixed dtypes sometimes failing to operate in-place (:issue:`44345`)
578579
-
579580

580581
Missing

pandas/core/indexing.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -1860,10 +1860,19 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
18601860
# in case of slice
18611861
ser = value[pi]
18621862
else:
1863-
# set the item, possibly having a dtype change
1864-
ser = ser.copy()
1865-
ser._mgr = ser._mgr.setitem(indexer=(pi,), value=value)
1866-
ser._maybe_update_cacher(clear=True, inplace=True)
1863+
# set the item, first attempting to operate inplace, then
1864+
# falling back to casting if necessary; see
1865+
# _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace
1866+
1867+
orig_values = ser._values
1868+
ser._mgr = ser._mgr.setitem((pi,), value)
1869+
1870+
if ser._values is orig_values:
1871+
# The setitem happened inplace, so the DataFrame's values
1872+
# were modified inplace.
1873+
return
1874+
self.obj._iset_item(loc, ser, inplace=True)
1875+
return
18671876

18681877
# reset the sliced object if unique
18691878
self.obj._iset_item(loc, ser, inplace=True)

pandas/io/stata.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,8 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
600600
# Cast from unsupported types to supported types
601601
is_nullable_int = isinstance(data[col].dtype, (_IntegerDtype, BooleanDtype))
602602
orig = data[col]
603+
# We need to find orig_missing before altering data below
604+
orig_missing = orig.isna()
603605
if is_nullable_int:
604606
missing_loc = data[col].isna()
605607
if missing_loc.any():
@@ -650,11 +652,10 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
650652
f"supported by Stata ({float64_max})"
651653
)
652654
if is_nullable_int:
653-
missing = orig.isna()
654-
if missing.any():
655+
if orig_missing.any():
655656
# Replace missing by Stata sentinel value
656657
sentinel = StataMissingValue.BASE_MISSING_VALUES[data[col].dtype.name]
657-
data.loc[missing, col] = sentinel
658+
data.loc[orig_missing, col] = sentinel
658659
if ws:
659660
warnings.warn(ws, PossiblePrecisionLoss)
660661

pandas/tests/frame/indexing/test_setitem.py

+28-8
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ def test_setitem_frame_length_0_str_key(self, indexer):
384384
expected["A"] = expected["A"].astype("object")
385385
tm.assert_frame_equal(df, expected)
386386

387-
def test_setitem_frame_duplicate_columns(self, using_array_manager):
387+
def test_setitem_frame_duplicate_columns(self, using_array_manager, request):
388388
# GH#15695
389389
cols = ["A", "B", "C"] * 2
390390
df = DataFrame(index=range(3), columns=cols)
@@ -407,6 +407,11 @@ def test_setitem_frame_duplicate_columns(self, using_array_manager):
407407
expected["C"] = expected["C"].astype("int64")
408408
# TODO(ArrayManager) .loc still overwrites
409409
expected["B"] = expected["B"].astype("int64")
410+
411+
mark = pytest.mark.xfail(
412+
reason="Both 'A' columns get set with 3 instead of 0 and 3"
413+
)
414+
request.node.add_marker(mark)
410415
else:
411416
# set these with unique columns to be extra-unambiguous
412417
expected[2] = expected[2].astype(np.int64)
@@ -995,22 +1000,37 @@ def test_setitem_always_copy(self, float_frame):
9951000
float_frame["E"][5:10] = np.nan
9961001
assert notna(s[5:10]).all()
9971002

998-
def test_setitem_clear_caches(self):
999-
# see GH#304
1003+
@pytest.mark.parametrize("consolidate", [True, False])
1004+
def test_setitem_partial_column_inplace(self, consolidate, using_array_manager):
1005+
# This setting should be in-place, regardless of whether frame is
1006+
# single-block or multi-block
1007+
# GH#304 this used to be incorrectly not-inplace, in which case
1008+
# we needed to ensure _item_cache was cleared.
1009+
10001010
df = DataFrame(
10011011
{"x": [1.1, 2.1, 3.1, 4.1], "y": [5.1, 6.1, 7.1, 8.1]}, index=[0, 1, 2, 3]
10021012
)
10031013
df.insert(2, "z", np.nan)
1014+
if not using_array_manager:
1015+
if consolidate:
1016+
df._consolidate_inplace()
1017+
assert len(df._mgr.blocks) == 1
1018+
else:
1019+
assert len(df._mgr.blocks) == 2
10041020

1005-
# cache it
1006-
foo = df["z"]
1007-
df.loc[df.index[2:], "z"] = 42
1021+
zvals = df["z"]._values
10081022

1009-
expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z")
1023+
df.loc[2:, "z"] = 42
10101024

1011-
assert df["z"] is not foo
1025+
expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z")
10121026
tm.assert_series_equal(df["z"], expected)
10131027

1028+
# check setting occurred in-place
1029+
tm.assert_numpy_array_equal(zvals, expected.values)
1030+
assert np.shares_memory(zvals, df["z"]._values)
1031+
if not consolidate:
1032+
assert df["z"]._values is zvals
1033+
10141034
def test_setitem_duplicate_columns_not_inplace(self):
10151035
# GH#39510
10161036
cols = ["A", "B"] * 2

pandas/tests/frame/indexing/test_xs.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -366,20 +366,21 @@ def test_xs_droplevel_false_view(self, using_array_manager):
366366
assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)
367367
# modifying original df also modifies result when having a single block
368368
df.iloc[0, 0] = 2
369-
if not using_array_manager:
370-
expected = DataFrame({"a": [2]})
371-
else:
372-
# TODO(ArrayManager) iloc does not update the array inplace using
373-
# "split" path
374-
expected = DataFrame({"a": [1]})
369+
expected = DataFrame({"a": [2]})
375370
tm.assert_frame_equal(result, expected)
376371

377372
# with mixed dataframe, modifying the parent doesn't modify result
378373
# TODO the "split" path behaves differently here as with single block
379374
df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
380375
result = df.xs("a", axis=1, drop_level=False)
381376
df.iloc[0, 0] = 2
382-
expected = DataFrame({"a": [1]})
377+
if using_array_manager:
378+
# Here the behavior is consistent
379+
expected = DataFrame({"a": [2]})
380+
else:
381+
# FIXME: iloc does not update the array inplace using
382+
# "split" path
383+
expected = DataFrame({"a": [1]})
383384
tm.assert_frame_equal(result, expected)
384385

385386
def test_xs_list_indexer_droplevel_false(self):

pandas/tests/frame/test_reductions.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,10 @@ def test_std_timedelta64_skipna_false(self):
789789
# GH#37392
790790
tdi = pd.timedelta_range("1 Day", periods=10)
791791
df = DataFrame({"A": tdi, "B": tdi})
792+
# Copy is needed for ArrayManager case, otherwise setting df.iloc
793+
# below edits tdi, alterting both df['A'] and df['B']
794+
# FIXME: passing copy=True to constructor does not fix this
795+
df = df.copy()
792796
df.iloc[-2, -1] = pd.NaT
793797

794798
result = df.std(skipna=False)
@@ -1017,7 +1021,9 @@ def test_idxmax_mixed_dtype(self):
10171021
# don't cast to object, which would raise in nanops
10181022
dti = date_range("2016-01-01", periods=3)
10191023

1020-
df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti})
1024+
# Copying dti is needed for ArrayManager otherwise when we set
1025+
# df.loc[0, 3] = pd.NaT below it edits dti
1026+
df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti.copy(deep=True)})
10211027

10221028
result = df.idxmax()
10231029
expected = Series([1, 0, 2], index=[1, 2, 3])
@@ -1074,6 +1080,10 @@ def test_idxmax_idxmin_convert_dtypes(self, op, expected_value):
10741080
def test_idxmax_dt64_multicolumn_axis1(self):
10751081
dti = date_range("2016-01-01", periods=3)
10761082
df = DataFrame({3: dti, 4: dti[::-1]})
1083+
# FIXME: copy needed for ArrayManager, otherwise setting with iloc
1084+
# below also sets df.iloc[-1, 1]; passing copy=True to DataFrame
1085+
# does not solve this.
1086+
df = df.copy()
10771087
df.iloc[0, 0] = pd.NaT
10781088

10791089
df._consolidate_inplace()

0 commit comments

Comments
 (0)