From c5b7ea7e063220c29e68e0a1c1b7f3d188f41614 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 30 Jan 2022 15:47:35 -0800 Subject: [PATCH 1/3] Backport PR #45706: BUG: Frame.iat item_cache invalidation bug --- doc/source/whatsnew/v1.4.1.rst | 3 ++- pandas/core/frame.py | 24 ++++++++---------------- pandas/tests/indexing/test_iat.py | 17 +++++++++++++++++ 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v1.4.1.rst b/doc/source/whatsnew/v1.4.1.rst index 035b2ce8eb418..9c813a4803004 100644 --- a/doc/source/whatsnew/v1.4.1.rst +++ b/doc/source/whatsnew/v1.4.1.rst @@ -17,9 +17,10 @@ Fixed regressions - Regression in :meth:`Series.mask` with ``inplace=True`` and ``PeriodDtype`` and an incompatible ``other`` coercing to a common dtype instead of raising (:issue:`45546`) - Regression in :func:`.assert_frame_equal` not respecting ``check_flags=False`` (:issue:`45554`) - Regression in :meth:`Series.fillna` with ``downcast=False`` incorrectly downcasting ``object`` dtype (:issue:`45603`) +- Regression in :meth:`DataFrame.iat` setting values leading to not propagating correctly in subsequent lookups (:issue:`45684`) - Regression in :meth:`DataFrame.loc.__setitem__` losing :class:`Index` name if :class:`DataFrame` was empty before (:issue:`45621`) - Regression in :func:`join` with overlapping :class:`IntervalIndex` raising an ``InvalidIndexError`` (:issue:`45661`) -- + .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 512d85de333ad..a9befb3012343 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -93,7 +93,6 @@ ) from pandas.core.dtypes.cast import ( - can_hold_element, construct_1d_arraylike_from_scalar, construct_2d_arraylike_from_scalar, find_common_type, @@ -3864,23 +3863,16 @@ def _set_value( try: if takeable: series = self._ixs(col, axis=1) - series._set_value(index, value, takeable=True) - return - - series = self._get_item_cache(col) - loc = self.index.get_loc(index) - dtype = series.dtype - if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]: - # otherwise we have EA values, and this check will be done - # via setitem_inplace - if not can_hold_element(series._values, value): - # We'll go through loc and end up casting. - raise TypeError + loc = index + else: + series = self._get_item_cache(col) + loc = self.index.get_loc(index) + # setitem_inplace will do validation that may raise TypeError + # or ValueError series._mgr.setitem_inplace(loc, value) - # Note: trying to use series._set_value breaks tests in - # tests.frame.indexing.test_indexing and tests.indexing.test_partial - except (KeyError, TypeError): + + except (KeyError, TypeError, ValueError): # set using a non-recursive method & reset the cache if takeable: self.iloc[index, col] = value diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py index f1fe464ca0854..44bd51ee1b7d1 100644 --- a/pandas/tests/indexing/test_iat.py +++ b/pandas/tests/indexing/test_iat.py @@ -29,3 +29,20 @@ def test_iat_getitem_series_with_period_index(): expected = ser[index[0]] result = ser.iat[0] assert expected == result + + +def test_iat_setitem_item_cache_cleared(indexer_ial): + # GH#45684 + data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)} + df = DataFrame(data).copy() + ser = df["y"] + + # previously this iat setting would split the block and fail to clear + # the item_cache. + indexer_ial(df)[7, 0] = 9999 + + indexer_ial(df)[7, 1] = 1234 + + assert df.iat[7, 1] == 1234 + assert ser.iloc[-1] == 1234 + assert df.iloc[-1, -1] == 1234 From 93fa960bc08008aa1b93d7be5e777d7dcee42b90 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Jan 2022 14:29:42 -0800 Subject: [PATCH 2/3] port indexer_ial --- pandas/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 04f460902c11a..3803a10e33fae 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1757,6 +1757,14 @@ def indexer_al(request): return request.param +@pytest.fixture(params=[tm.iat, tm.iloc]) +def indexer_ial(request): + """ + Parametrize over iat.__setitem__, iloc.__setitem__ + """ + return request.param + + @pytest.fixture def using_array_manager(request): """ From 0464ea8f0f5ec40cfd02394c1a848d1b9e278417 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Feb 2022 09:40:04 -0800 Subject: [PATCH 3/3] validation --- pandas/core/internals/base.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 74d8b20332fff..75ae0f62681aa 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -9,6 +9,8 @@ final, ) +import numpy as np + from pandas._typing import ( ArrayLike, DtypeObj, @@ -16,7 +18,10 @@ ) from pandas.errors import AbstractMethodError -from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.cast import ( + find_common_type, + np_can_hold_element, +) from pandas.core.base import PandasObject from pandas.core.indexes.api import ( @@ -174,6 +179,14 @@ def setitem_inplace(self, indexer, value) -> None: in place, not returning a new Manager (and Block), and thus never changing the dtype. """ + arr = self.array + + # EAs will do this validation in their own __setitem__ methods. + if isinstance(arr, np.ndarray): + # Note: checking for ndarray instead of np.dtype means we exclude + # dt64/td64, which do their own validation. + value = np_can_hold_element(arr.dtype, value) + self.array[indexer] = value def grouped_reduce(self, func, ignore_failures: bool = False):