From 8667c3abd1a23d35b30f3444abb52471e45d28b4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 26 Mar 2020 11:07:20 -0700 Subject: [PATCH 1/3] BUG: DataFrame.at with non-unique axes --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/indexing.py | 17 +++++++++++++++++ pandas/tests/indexing/test_scalar.py | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0ebe57bfbb3a1..297b53807d2bb 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -332,6 +332,7 @@ Indexing - Bug in :meth:`DatetimeIndex.get_loc` raising ``KeyError`` with converted-integer key instead of the user-passed key (:issue:`31425`) - Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`) - Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`) +- Bug in :meth:`DataFrame.at` when either columns or index is non-unique (:issue:`33041`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`) - Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`) - Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8038bba8b6448..955c9e42c84ba 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2063,7 +2063,16 @@ def _convert_key(self, key, is_setter: bool = False): return key + @property + def _axes_are_unique(self) -> bool: + # Only relevant for self.ndim == 2 + return self.obj.index.is_unique and self.obj.columns.is_unique + def __getitem__(self, key): + if self.ndim == 2 and not self._axes_are_unique: + # GH#33041 fall back to .loc + return self.obj.loc[key] + if self.ndim != 1 or not is_scalar(key): # FIXME: is_scalar check is a kludge return super().__getitem__(key) @@ -2073,6 +2082,14 @@ def __getitem__(self, key): loc = obj.index.get_loc(key) return obj.index._get_values_for_loc(obj, loc, key) + def __setitem__(self, key, value): + if self.ndim == 2 and not self._axes_are_unique: + # GH#33041 fall back to .loc + self.obj.loc[key] = value + return + + return super().__setitem__(key, value) + @doc(IndexingMixin.iat) class _iAtIndexer(_ScalarAccessIndexer): diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 25939e63c256b..df3be1a86f62a 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -128,6 +128,24 @@ def test_imethods_with_dups(self): result = df.iat[2, 0] assert result == 2 + def test_frame_at_with_duplicate_axes(self): + # GH#33041 + arr = np.random.randn(6).reshape(3, 2) + df = DataFrame(arr, columns=["A", "A"]) + + result = df.at[0, "A"] + expected = df.iloc[0] + + tm.assert_series_equal(result, expected) + + result = df.T.at["A", 0] + tm.assert_series_equal(result, expected) + + # setter + df.at[1, "A"] = 2 + expected = Series([2.0, 2.0], index=["A", "A"], name=1) + tm.assert_series_equal(df.iloc[1], expected) + def test_series_at_raises_type_error(self): # at should not fallback # GH 7814 From 42dfe2902a38cfb7d6fcfad56e30109e03a269d2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 26 Mar 2020 16:31:29 -0700 Subject: [PATCH 2/3] assertion --- pandas/core/indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 955c9e42c84ba..8eee3b9d00204 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2066,6 +2066,7 @@ def _convert_key(self, key, is_setter: bool = False): @property def _axes_are_unique(self) -> bool: # Only relevant for self.ndim == 2 + assert self.ndim == 2 return self.obj.index.is_unique and self.obj.columns.is_unique def __getitem__(self, key): From 02f6548e4f0f9d0de89bf6c80947fe56fe7fbd6b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Apr 2020 15:52:23 -0700 Subject: [PATCH 3/3] Prevent letting non-scalar lookups slip in --- pandas/core/indexing.py | 16 ++++++++-------- pandas/tests/indexing/test_scalar.py | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 81476b767d13a..dd072cf00ed20 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2045,6 +2045,7 @@ def __setitem__(self, key, value): key = _tuplify(self.ndim, key) if len(key) != self.ndim: raise ValueError("Not enough indexers for scalar access (setting)!") + key = list(self._convert_key(key, is_setter=True)) self.obj._set_value(*key, value=value, takeable=self._takeable) @@ -2071,22 +2072,21 @@ def _axes_are_unique(self) -> bool: return self.obj.index.is_unique and self.obj.columns.is_unique def __getitem__(self, key): + if self.ndim == 2 and not self._axes_are_unique: # GH#33041 fall back to .loc + if not isinstance(key, tuple) or not all(is_scalar(x) for x in key): + raise ValueError("Invalid call for scalar access (getting)!") return self.obj.loc[key] - if self.ndim != 1 or not is_scalar(key): - # FIXME: is_scalar check is a kludge - return super().__getitem__(key) - - # Like Index.get_value, but we do not allow positional fallback - obj = self.obj - loc = obj.index.get_loc(key) - return obj.index._get_values_for_loc(obj, loc, key) + return super().__getitem__(key) def __setitem__(self, key, value): if self.ndim == 2 and not self._axes_are_unique: # GH#33041 fall back to .loc + if not isinstance(key, tuple) or not all(is_scalar(x) for x in key): + raise ValueError("Invalid call for scalar access (setting)!") + self.obj.loc[key] = value return diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 58046e88228bc..216d554e22b49 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -146,6 +146,28 @@ def test_frame_at_with_duplicate_axes(self): expected = Series([2.0, 2.0], index=["A", "A"], name=1) tm.assert_series_equal(df.iloc[1], expected) + def test_frame_at_with_duplicate_axes_requires_scalar_lookup(self): + # GH#33041 check that falling back to loc doesn't allow non-scalar + # args to slip in + + arr = np.random.randn(6).reshape(3, 2) + df = DataFrame(arr, columns=["A", "A"]) + + msg = "Invalid call for scalar access" + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] + + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] = 1 + def test_series_at_raises_type_error(self): # at should not fallback # GH 7814