Skip to content

API/BUG: always try to operate inplace when setting with loc/iloc[foo, bar] #39163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
2d2858f
API/BUG: always try to operate inplace when setting with loc/iloc[foo…
jbrockmendel Jan 14, 2021
6abba68
update categorical test
jbrockmendel Jan 15, 2021
97db70f
Merge branch 'master' of https://github.com/pandas-dev/pandas into ap…
jbrockmendel Jan 16, 2021
8bc4546
Merge branch 'master' of https://github.com/pandas-dev/pandas into ap…
jbrockmendel Jan 20, 2021
515726d
more whatsnew
jbrockmendel Jan 21, 2021
2cdd4cf
Merge branch 'master' of https://github.com/pandas-dev/pandas into ap…
jbrockmendel Jan 21, 2021
d529b9f
Merge branch 'master' of https://github.com/pandas-dev/pandas into ap…
jbrockmendel Jan 25, 2021
e648aa3
Merge branch 'master' of https://github.com/pandas-dev/pandas into ap…
jbrockmendel Jan 25, 2021
f7ea31a
Merge branch 'master' into api-setitem-inplace
jbrockmendel Jan 28, 2021
df120b1
Merge branch 'master' of https://github.com/pandas-dev/pandas into ap…
jbrockmendel Jan 28, 2021
3fe66b0
Merge branch 'master' into api-setitem-inplace
jbrockmendel Feb 2, 2021
1f111de
Merge branch 'master' of https://github.com/pandas-dev/pandas into ap…
jbrockmendel Feb 10, 2021
09c8ffd
re-write whatnsew note
jbrockmendel Feb 10, 2021
4a1ce99
Merge branch 'master' into api-setitem-inplace
jbrockmendel Feb 23, 2021
f27025f
Merge branch 'master' into api-setitem-inplace
jbrockmendel Feb 25, 2021
cffaedf
Merge branch 'master' into api-setitem-inplace
jbrockmendel Feb 26, 2021
71cb80d
Merge branch 'master' into api-setitem-inplace
jbrockmendel Mar 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,46 @@ Preserve dtypes in :meth:`~pandas.DataFrame.combine_first`
combined.dtypes


Try operating inplace when setting values with ``loc`` and ``iloc``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

When setting an entire column using ``loc`` or ``iloc``, pandas will try to
insert the values into the existing data rather than create an entirely new array.

.. ipython:: python

df = pd.DataFrame(range(3), columns=["A"], dtype="float64")
values = df.values
new = np.array([5, 6, 7], dtype="int64")
df.loc[[0, 1, 2], "A"] = new

In both the new and old behavior, the data in ``values`` is overwritten, but in
the old behavior the dtype of ``df["A"]`` changed to ``int64``.

*pandas 1.2.x*

.. code-block:: ipython

In [1]: df.dtypes
Out[1]:
A int64
dtype: object
In [2]: np.shares_memory(df["A"].values, new)
Out[2]: False
In [3]: np.shares_memory(df["A"].values, values)
Out[3]: False

In pandas 1.3.0, ``df`` continues to share data with ``values``

*pandas 1.3.0*

.. ipython:: python

df.dtypes
np.shares_memory(df["A"], new)
np.shares_memory(df["A"], values)


.. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting:

Consistent Casting With Setting Into Boolean Series
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1866,7 +1866,6 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
ser = value
elif is_array_like(value) and is_exact_shape_match(ser, value):
ser = value

else:
# set the item, possibly having a dtype change
ser = ser.copy()
Expand Down
21 changes: 11 additions & 10 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
Categorical,
DatetimeArray,
ExtensionArray,
FloatingArray,
IntegerArray,
PandasArray,
)
from pandas.core.base import PandasObject
Expand Down Expand Up @@ -994,6 +996,7 @@ def setitem(self, indexer, value):
# length checking
check_setitem_lengths(indexer, value, values)
exact_match = is_exact_shape_match(values, arr_value)

if is_empty_indexer(indexer, arr_value):
# GH#8669 empty indexers
pass
Expand All @@ -1007,27 +1010,21 @@ def setitem(self, indexer, value):
# GH25495 - If the current dtype is not categorical,
# we need to create a new categorical block
values[indexer] = value
if values.ndim == 2:
# TODO(EA2D): special case not needed with 2D EAs
if values.shape[-1] != 1:
# shouldn't get here (at least until 2D EAs)
raise NotImplementedError
values = values[:, 0]
return self.make_block(Categorical(values, dtype=arr_value.dtype))

elif exact_match and is_ea_value:
# GH#32395 if we're going to replace the values entirely, just
# substitute in the new array
return self.make_block(arr_value)
if not self.is_object and isinstance(value, (IntegerArray, FloatingArray)):
values[indexer] = value.to_numpy(value.dtype.numpy_dtype)
else:
values[indexer] = np.asarray(value)

# if we are an exact match (ex-broadcasting),
# then use the resultant dtype
elif exact_match:
# We are setting _all_ of the array's values, so can cast to new dtype
values[indexer] = value

values = values.astype(arr_value.dtype, copy=False)

elif is_ea_value:
# GH#38952
if values.ndim == 1:
Expand Down Expand Up @@ -1892,6 +1889,10 @@ class NumericBlock(Block):
is_numeric = True

def _can_hold_element(self, element: Any) -> bool:
element = extract_array(element, extract_numpy=True)
if isinstance(element, (IntegerArray, FloatingArray)):
if element._mask.any():
return False
return can_hold_element(self.dtype, element)

@property
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,13 +339,20 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):

key = full_indexer(df)
result.loc[key, "data"] = df["data"]

self.assert_frame_equal(result, expected)

def test_setitem_series(self, data, full_indexer):
# https://github.com/pandas-dev/pandas/issues/32395
ser = expected = pd.Series(data, name="data")
ser = pd.Series(data, name="data")
result = pd.Series(index=ser.index, dtype=object, name="data")

# because result has object dtype, the attempt to do setting inplace
# is successful, and object dtype is retained
key = full_indexer(ser)
result.loc[key] = ser

expected = pd.Series(
data.astype(object), index=ser.index, name="data", dtype=object
)
self.assert_series_equal(result, expected)
17 changes: 17 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,23 @@ def test_setitem_slice(self, data, box_in_series):
def test_setitem_loc_iloc_slice(self, data):
super().test_setitem_loc_iloc_slice(data)

def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
# https://github.com/pandas-dev/pandas/issues/32395
df = expected = pd.DataFrame({"data": pd.Series(data)})
result = pd.DataFrame(index=df.index)

# because result has object dtype, the attempt to do setting inplace
# is successful, and object dtype is retained
key = full_indexer(df)
result.loc[key, "data"] = df["data"]

# base class method has expected = df; PandasArray behaves oddly because
# we patch _typ for these tests.
if data.dtype.numpy_dtype != object:
if not isinstance(key, slice) or key != slice(None):
expected = pd.DataFrame({"data": data.to_numpy()})
self.assert_frame_equal(result, expected)


@skip_nested
class TestParsing(BaseNumPyTests, base.BaseParsingTests):
Expand Down
26 changes: 12 additions & 14 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,33 +67,31 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key):
frame = DataFrame({0: range(3)}, dtype=object)

cat = Categorical(["alpha", "beta", "gamma"])
expected = DataFrame({0: cat})
# NB: pending GH#38896, the expected likely should become
# expected= DataFrame({"A": cat.astype(object)})
# and should remain a view on the original values

assert frame._mgr.blocks[0]._can_hold_element(cat)

df = frame.copy()
orig_vals = df.values
indexer(df)[key, 0] = cat

overwrite = not isinstance(key, slice)
overwrite = isinstance(key, slice) and key == slice(None)

tm.assert_frame_equal(df, expected)

# TODO: this inconsistency is likely undesired GH#39986
if overwrite:
# check that we overwrote underlying
tm.assert_numpy_array_equal(orig_vals, df.values)
# TODO: GH#39986 this probably shouldn't behave differently
expected = DataFrame({0: cat})
assert not np.shares_memory(df.values, orig_vals)
else:
expected = DataFrame({0: cat}).astype(object)
assert np.shares_memory(df.values, orig_vals)

# but we don't have a view on orig_vals
orig_vals[0, 0] = 19
assert df.iloc[0, 0] != 19
tm.assert_frame_equal(df, expected)

# check we dont have a view on cat (may be undesired GH#39986)
df.iloc[0, 0] = "gamma"
assert cat[0] != "gamma"
if overwrite:
assert cat[0] != "gamma"
else:
assert cat[0] != "gamma"

@pytest.mark.parametrize("box", [pd_array, Series])
def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,7 @@ def test_float_index_non_scalar_assignment(self):
expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index)
tm.assert_frame_equal(expected, df)

def test_loc_setitem_fullindex_views(self):
df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0])
df2 = df.copy()
df.loc[df.index] = df.loc[df.index]
Expand Down
73 changes: 45 additions & 28 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,32 +588,19 @@ def test_loc_modify_datetime(self):

tm.assert_frame_equal(df, expected)

def test_loc_setitem_frame(self):
df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD"))

df.loc["a", "A"] = 1
result = df.loc["a", "A"]
assert result == 1

result = df.iloc[0, 0]
assert result == 1

df.loc[:, "B":"D"] = 0
expected = df.loc[:, "B":"D"]
result = df.iloc[:, 1:]
tm.assert_frame_equal(result, expected)

# GH 6254
# setting issue
df = DataFrame(index=[3, 5, 4], columns=["A"])
def test_loc_setitem_frame_with_reindex(self):
# GH#6254 setting issue
df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float)
df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
expected = DataFrame({"A": Series([1, 2, 3], index=[4, 3, 5])}).reindex(
index=[3, 5, 4]
)

# setting integer values into a float dataframe with loc is inplace,
# so we retain float dtype
ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
expected = DataFrame({"A": ser})
tm.assert_frame_equal(df, expected)

# GH 6252
# setting with an empty frame
def test_loc_setitem_empty_frame(self):
# GH#6252 setting with an empty frame
keys1 = ["@" + str(i) for i in range(5)]
val1 = np.arange(5, dtype="int64")

Expand All @@ -628,18 +615,39 @@ def test_loc_setitem_frame(self):
df["B"] = np.nan
df.loc[keys2, "B"] = val2

expected = DataFrame(
{"A": Series(val1, index=keys1), "B": Series(val2, index=keys2)}
).reindex(index=index)
# Because df["A"] was initialized as float64, setting values into it
# is inplace, so that dtype is retained
sera = Series(val1, index=keys1, dtype=np.float64)
serb = Series(val2, index=keys2)
expected = DataFrame({"A": sera, "B": serb}).reindex(index=index)
tm.assert_frame_equal(df, expected)

def test_loc_setitem_frame(self):
df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD"))

result = df.iloc[0, 0]

df.loc["a", "A"] = 1
result = df.loc["a", "A"]
assert result == 1

result = df.iloc[0, 0]
assert result == 1

df.loc[:, "B":"D"] = 0
expected = df.loc[:, "B":"D"]
result = df.iloc[:, 1:]
tm.assert_frame_equal(result, expected)

def test_loc_setitem_frame_nan_int_coercion_invalid(self):
# GH 8669
# invalid coercion of nan -> int
df = DataFrame({"A": [1, 2, 3], "B": np.nan})
df.loc[df.B > df.A, "B"] = df.A
expected = DataFrame({"A": [1, 2, 3], "B": np.nan})
tm.assert_frame_equal(df, expected)

def test_loc_setitem_frame_mixed_labels(self):
# GH 6546
# setting with mixed labels
df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]})
Expand Down Expand Up @@ -1063,8 +1071,15 @@ def test_loc_setitem_str_to_small_float_conversion_type(self):
expected = DataFrame(col_data, columns=["A"], dtype=object)
tm.assert_frame_equal(result, expected)

# change the dtype of the elements from object to float one by one
# assigning with loc/iloc attempts to set the values inplace, which
# in this case is succesful
result.loc[result.index, "A"] = [float(x) for x in col_data]
expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
tm.assert_frame_equal(result, expected)

# assigning the entire column using __setitem__ swaps in the new array
# GH#???
result["A"] = [float(x) for x in col_data]
expected = DataFrame(col_data, columns=["A"], dtype=float)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1219,7 +1234,9 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
tz = tz_naive_fixture
idx = date_range(start="2015-07-12", periods=3, freq="H", tz=tz)
expected = DataFrame(1.2, index=idx, columns=["var"])
result = DataFrame(index=idx, columns=["var"])
# if result started off with object dtype, tehn the .loc.__setitem__
# below would retain object dtype
result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
result.loc[:, idxer] = expected
tm.assert_frame_equal(result, expected)

Expand Down