Skip to content

Commit 4e4be0b

Browse files
authored
DEPR: enforce inplaceness for df.loc[:, foo]=bar (pandas-dev#49775)
* DEPR: enforce inplaceness for df.loc[:, foo]=bar * Fix ArrayManager tests * suggested edits to AM tets * update doctest * CoW test * whatsnew * Use reindex_indexer * suggested test edits
1 parent 4bd55a4 commit 4e4be0b

26 files changed

+197
-334
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ Removal of prior version deprecations/changes
716716
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
717717
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
718718
- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
719+
- Changed behavior in setting values with ``df.loc[:, foo] = bar`` or ``df.iloc[:, foo] = bar``, these now always attempt to set values inplace before falling back to casting (:issue:`45333`)
719720
- Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
720721
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
721722
- Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`)

pandas/core/dtypes/cast.py

+4
Original file line numberDiff line numberDiff line change
@@ -1821,6 +1821,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
18211821
return element
18221822
raise LossySetitemError
18231823

1824+
if dtype.kind == "V":
1825+
# i.e. np.void, which cannot hold _anything_
1826+
raise LossySetitemError
1827+
18241828
raise NotImplementedError(dtype)
18251829

18261830

pandas/core/frame.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -4076,7 +4076,7 @@ def _set_value(
40764076
else:
40774077
icol = self.columns.get_loc(col)
40784078
iindex = self.index.get_loc(index)
4079-
self._mgr.column_setitem(icol, iindex, value, inplace=True)
4079+
self._mgr.column_setitem(icol, iindex, value, inplace_only=True)
40804080
self._clear_item_cache()
40814081

40824082
except (KeyError, TypeError, ValueError, LossySetitemError):
@@ -7969,10 +7969,10 @@ def update(
79697969
>>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
79707970
>>> df.update(new_df)
79717971
>>> df
7972-
A B
7973-
0 1 4.0
7974-
1 2 500.0
7975-
2 3 6.0
7972+
A B
7973+
0 1 4
7974+
1 2 500
7975+
2 3 6
79767976
"""
79777977
from pandas.core.computation import expressions
79787978

@@ -8010,9 +8010,7 @@ def update(
80108010
if mask.all():
80118011
continue
80128012

8013-
with warnings.catch_warnings():
8014-
warnings.filterwarnings("ignore", "In a future version, `df.iloc")
8015-
self.loc[:, col] = expressions.where(mask, this, that)
8013+
self.loc[:, col] = expressions.where(mask, this, that)
80168014

80178015
# ----------------------------------------------------------------------
80188016
# Data reshaping

pandas/core/indexing.py

+33-63
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
cast,
1010
final,
1111
)
12-
import warnings
1312

1413
import numpy as np
1514

@@ -23,9 +22,9 @@
2322
AbstractMethodError,
2423
IndexingError,
2524
InvalidIndexError,
25+
LossySetitemError,
2626
)
2727
from pandas.util._decorators import doc
28-
from pandas.util._exceptions import find_stack_level
2928

3029
from pandas.core.dtypes.cast import (
3130
can_hold_element,
@@ -793,6 +792,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
793792
if self.ndim != 2:
794793
return
795794

795+
orig_key = key
796796
if isinstance(key, tuple) and len(key) > 1:
797797
# key may be a tuple if we are .loc
798798
# if length of key is > 1 set key to column part
@@ -808,6 +808,23 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
808808
):
809809
# GH#38148
810810
keys = self.obj.columns.union(key, sort=False)
811+
diff = Index(key).difference(self.obj.columns, sort=False)
812+
813+
if len(diff) and com.is_null_slice(orig_key[0]):
814+
# e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
815+
# is a new column, add the new columns with dtype=np.void
816+
# so that later when we go through setitem_single_column
817+
# we will use isetitem. Without this, the reindex_axis
818+
# below would create float64 columns in this example, which
819+
# would successfully hold 7, so we would end up with the wrong
820+
# dtype.
821+
indexer = np.arange(len(keys), dtype=np.intp)
822+
indexer[len(self.obj.columns) :] = -1
823+
new_mgr = self.obj._mgr.reindex_indexer(
824+
keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True
825+
)
826+
self.obj._mgr = new_mgr
827+
return
811828

812829
self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
813830

@@ -1984,72 +2001,25 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
19842001
"""
19852002
pi = plane_indexer
19862003

1987-
orig_values = self.obj._get_column_array(loc)
1988-
1989-
# perform the equivalent of a setitem on the info axis
1990-
# as we have a null slice or a slice with full bounds
1991-
# which means essentially reassign to the columns of a
1992-
# multi-dim object
1993-
# GH#6149 (null slice), GH#10408 (full bounds)
1994-
if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
1995-
pass
1996-
elif (
1997-
is_array_like(value)
1998-
and len(value.shape) > 0
1999-
and self.obj.shape[0] == value.shape[0]
2000-
and not is_empty_indexer(pi)
2001-
):
2002-
if is_list_like(pi) and not is_bool_dtype(pi):
2003-
value = value[np.argsort(pi)]
2004-
else:
2005-
# in case of slice
2006-
value = value[pi]
2004+
is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))
2005+
2006+
if is_full_setter:
2007+
2008+
try:
2009+
self.obj._mgr.column_setitem(
2010+
loc, plane_indexer, value, inplace_only=True
2011+
)
2012+
except (ValueError, TypeError, LossySetitemError):
2013+
# If we're setting an entire column and we can't do it inplace,
2014+
# then we can use value's dtype (or inferred dtype)
2015+
# instead of object
2016+
self.obj.isetitem(loc, value)
20072017
else:
20082018
# set value into the column (first attempting to operate inplace, then
20092019
# falling back to casting if necessary)
20102020
self.obj._mgr.column_setitem(loc, plane_indexer, value)
2011-
self.obj._clear_item_cache()
2012-
return
2013-
2014-
self.obj._iset_item(loc, value)
20152021

2016-
# We will not operate in-place, but will attempt to in the future.
2017-
# To determine whether we need to issue a FutureWarning, see if the
2018-
# setting in-place would work, i.e. behavior will change.
2019-
2020-
new_values = self.obj._get_column_array(loc)
2021-
2022-
if can_hold_element(orig_values, new_values) and not len(new_values) == 0:
2023-
# Don't issue the warning yet, as we can still trim a few cases where
2024-
# behavior will not change.
2025-
2026-
if (
2027-
isinstance(new_values, np.ndarray)
2028-
and isinstance(orig_values, np.ndarray)
2029-
and (
2030-
np.shares_memory(new_values, orig_values)
2031-
or new_values.shape != orig_values.shape
2032-
)
2033-
):
2034-
# TODO: get something like tm.shares_memory working?
2035-
# The values were set inplace after all, no need to warn,
2036-
# e.g. test_rename_nocopy
2037-
# In case of enlarging we can not set inplace, so need to
2038-
# warn either
2039-
pass
2040-
else:
2041-
warnings.warn(
2042-
"In a future version, `df.iloc[:, i] = newvals` will attempt "
2043-
"to set the values inplace instead of always setting a new "
2044-
"array. To retain the old behavior, use either "
2045-
"`df[df.columns[i]] = newvals` or, if columns are non-unique, "
2046-
"`df.isetitem(i, newvals)`",
2047-
FutureWarning,
2048-
stacklevel=find_stack_level(),
2049-
)
2050-
# TODO: how to get future behavior?
2051-
# TODO: what if we got here indirectly via loc?
2052-
return
2022+
self.obj._clear_item_cache()
20532023

20542024
def _setitem_single_block(self, indexer, value, name: str) -> None:
20552025
"""

pandas/core/internals/array_manager.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -856,19 +856,21 @@ def iset(
856856
return
857857

858858
def column_setitem(
859-
self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False
859+
self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False
860860
) -> None:
861861
"""
862862
Set values ("setitem") into a single column (not setting the full column).
863863
864864
This is a method on the ArrayManager level, to avoid creating an
865865
intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)
866+
867+
866868
"""
867869
if not is_integer(loc):
868870
raise TypeError("The column index should be an integer")
869871
arr = self.arrays[loc]
870872
mgr = SingleArrayManager([arr], [self._axes[0]])
871-
if inplace:
873+
if inplace_only:
872874
mgr.setitem_inplace(idx, value)
873875
else:
874876
new_mgr = mgr.setitem((idx,), value)

pandas/core/internals/managers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1336,7 +1336,7 @@ def _iset_single(
13361336
return
13371337

13381338
def column_setitem(
1339-
self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False
1339+
self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False
13401340
) -> None:
13411341
"""
13421342
Set values ("setitem") into a single column (not setting the full column).
@@ -1355,7 +1355,7 @@ def column_setitem(
13551355
# this manager is only created temporarily to mutate the values in place
13561356
# so don't track references, otherwise the `setitem` would perform CoW again
13571357
col_mgr = self.iget(loc, track_ref=False)
1358-
if inplace:
1358+
if inplace_only:
13591359
col_mgr.setitem_inplace(idx, value)
13601360
else:
13611361
new_mgr = col_mgr.setitem((idx,), value)

pandas/tests/computation/test_eval.py

-1
Original file line numberDiff line numberDiff line change
@@ -1860,7 +1860,6 @@ def test_eval_no_support_column_name(request, column):
18601860
tm.assert_frame_equal(result, expected)
18611861

18621862

1863-
@td.skip_array_manager_not_yet_implemented
18641863
def test_set_inplace(using_copy_on_write):
18651864
# https://github.com/pandas-dev/pandas/issues/47449
18661865
# Ensure we don't only update the DataFrame inplace, but also the actual

pandas/tests/copy_view/test_indexing.py

+12-20
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,8 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt
337337
subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
338338
else:
339339
with pd.option_context("chained_assignment", "warn"):
340-
# The (i)loc[:, col] inplace deprecation gets triggered here, ignore those
341-
# warnings and only assert the SettingWithCopyWarning
342340
with tm.assert_produces_warning(
343-
SettingWithCopyWarning,
341+
None,
344342
raise_on_extra_warnings=not using_array_manager,
345343
):
346344
subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
@@ -351,7 +349,7 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt
351349
index=range(1, 3),
352350
)
353351
tm.assert_frame_equal(subset, expected)
354-
if using_copy_on_write or using_array_manager:
352+
if using_copy_on_write:
355353
# original parent dataframe is not modified (CoW)
356354
tm.assert_frame_equal(df, df_orig)
357355
else:
@@ -373,18 +371,16 @@ def test_subset_set_column_with_loc2(using_copy_on_write, using_array_manager):
373371
subset.loc[:, "a"] = 0
374372
else:
375373
with pd.option_context("chained_assignment", "warn"):
376-
# The (i)loc[:, col] inplace deprecation gets triggered here, ignore those
377-
# warnings and only assert the SettingWithCopyWarning
378374
with tm.assert_produces_warning(
379-
SettingWithCopyWarning,
375+
None,
380376
raise_on_extra_warnings=not using_array_manager,
381377
):
382378
subset.loc[:, "a"] = 0
383379

384380
subset._mgr._verify_integrity()
385381
expected = DataFrame({"a": [0, 0]}, index=range(1, 3))
386382
tm.assert_frame_equal(subset, expected)
387-
if using_copy_on_write or using_array_manager:
383+
if using_copy_on_write:
388384
# original parent dataframe is not modified (CoW)
389385
tm.assert_frame_equal(df, df_orig)
390386
else:
@@ -439,24 +435,20 @@ def test_subset_set_with_column_indexer(
439435
subset.loc[:, indexer] = 0
440436
else:
441437
with pd.option_context("chained_assignment", "warn"):
442-
# The (i)loc[:, col] inplace deprecation gets triggered here, ignore those
443-
# warnings and only assert the SettingWithCopyWarning
444-
with tm.assert_produces_warning(
445-
SettingWithCopyWarning, raise_on_extra_warnings=False
446-
):
447-
subset.loc[:, indexer] = 0
438+
# As of 2.0, this setitem attempts (successfully) to set values
439+
# inplace, so the assignment is not chained.
440+
subset.loc[:, indexer] = 0
448441

449442
subset._mgr._verify_integrity()
450443
expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3))
451-
# TODO full row slice .loc[:, idx] update inplace instead of overwrite?
452-
expected["b"] = expected["b"].astype("int64")
453444
tm.assert_frame_equal(subset, expected)
454-
if using_copy_on_write or using_array_manager:
445+
if using_copy_on_write:
455446
tm.assert_frame_equal(df, df_orig)
456447
else:
457-
# In the mixed case with BlockManager, only one of the two columns is
458-
# mutated in the parent frame ..
459-
df_orig.loc[1:2, ["a"]] = 0
448+
# pre-2.0, in the mixed case with BlockManager, only column "a"
449+
# would be mutated in the parent frame. this changed with the
450+
# enforcement of GH#45333
451+
df_orig.loc[1:2, ["a", "b"]] = 0
460452
tm.assert_frame_equal(df, df_orig)
461453

462454

pandas/tests/extension/base/setitem.py

+2-25
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.core.dtypes.dtypes import (
5-
DatetimeTZDtype,
6-
IntervalDtype,
7-
PandasDtype,
8-
PeriodDtype,
9-
)
10-
114
import pandas as pd
125
import pandas._testing as tm
136
from pandas.tests.extension.base.base import BaseExtensionTests
@@ -382,11 +375,6 @@ def test_setitem_frame_2d_values(self, data):
382375
# GH#44514
383376
df = pd.DataFrame({"A": data})
384377

385-
# These dtypes have non-broken implementations of _can_hold_element
386-
has_can_hold_element = isinstance(
387-
data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
388-
)
389-
390378
# Avoiding using_array_manager fixture
391379
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
392380
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
@@ -396,24 +384,13 @@ def test_setitem_frame_2d_values(self, data):
396384

397385
orig = df.copy()
398386

399-
msg = "will attempt to set the values inplace instead"
400-
warn = None
401-
if has_can_hold_element and not isinstance(data.dtype, PandasDtype):
402-
# PandasDtype excluded because it isn't *really* supported.
403-
warn = FutureWarning
404-
405-
with tm.assert_produces_warning(warn, match=msg):
406-
df.iloc[:] = df
387+
df.iloc[:] = df
407388
self.assert_frame_equal(df, orig)
408389

409390
df.iloc[:-1] = df.iloc[:-1]
410391
self.assert_frame_equal(df, orig)
411392

412-
if isinstance(data.dtype, DatetimeTZDtype):
413-
# no warning bc df.values casts to object dtype
414-
warn = None
415-
with tm.assert_produces_warning(warn, match=msg):
416-
df.iloc[:] = df.values
393+
df.iloc[:] = df.values
417394
self.assert_frame_equal(df, orig)
418395
if not using_array_manager and not using_copy_on_write:
419396
# GH#33457 Check that this setting occurred in-place

0 commit comments

Comments
 (0)