From 178f79eaae071f09c8cf6902869158a857226895 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Jun 2021 22:08:32 -0700 Subject: [PATCH 1/3] BUG: Series.loc[-1] with UInt64Index --- pandas/_libs/index.pyx | 3 ++- pandas/core/indexes/base.py | 5 +++++ pandas/core/indexes/datetimelike.py | 3 +-- pandas/core/indexes/numeric.py | 16 ---------------- pandas/tests/indexing/test_loc.py | 28 +++++++++++++++++++++------- 5 files changed, 29 insertions(+), 26 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f7cec262ca302..3351bb7cac7d6 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -106,7 +106,8 @@ cdef class IndexEngine: try: return self.mapping.get_item(val) - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): + # GH#41775 OverflowError e.g. if we are uint64 and val is -1 raise KeyError(val) cdef inline _get_loc_duplicates(self, object val): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2a50ebd959ace..c0138cca3876e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5413,6 +5413,11 @@ def _find_common_type_compat(self, target) -> DtypeObj: target_dtype, _ = infer_dtype_from(target, pandas_dtype=True) dtype = find_common_type([self.dtype, target_dtype]) + + if {self.dtype.kind, target_dtype.kind} == {"i", "u"}: + # See comment in Index.union about losslessness + return np.dtype("object") + if dtype.kind in ["i", "u"]: # TODO: what about reversed with self being categorical? if ( diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 5f24eb0cfaad6..3dc46f04d1d45 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -49,7 +49,6 @@ TimedeltaArray, ) from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin -import pandas.core.common as com import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, @@ -599,7 +598,7 @@ def _convert_arr_indexer(self, keyarr): try: return self._data._validate_listlike(keyarr, allow_object=True) except (ValueError, TypeError): - return com.asarray_tuplesafe(keyarr) + return super()._convert_arr_indexer(keyarr) class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index de7c522b4fbec..e6526bd0eaf2f 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -37,7 +37,6 @@ ) from pandas.core.dtypes.generic import ABCSeries -import pandas.core.common as com from pandas.core.indexes.base import ( Index, maybe_extract_name, @@ -250,21 +249,6 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): # we will try to coerce to integers return self._maybe_cast_indexer(label) - @doc(Index._convert_arr_indexer) - def _convert_arr_indexer(self, keyarr) -> np.ndarray: - if not is_unsigned_integer_dtype(self.dtype): - return super()._convert_arr_indexer(keyarr) - - # Cast the indexer to uint64 if possible so that the values returned - # from indexing are also uint64. - dtype = None - if is_integer_dtype(keyarr) or ( - lib.infer_dtype(keyarr, skipna=False) == "integer" - ): - dtype = np.dtype(np.uint64) - - return com.asarray_tuplesafe(keyarr, dtype=dtype) - # ---------------------------------------------------------------- @doc(Index._shallow_copy) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c1a096ed06efc..4568ecb8939fa 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1005,18 +1005,32 @@ def test_loc_copy_vs_view(self): def test_loc_uint64(self): # GH20722 # Test whether loc accept uint64 max value as index. - s = Series([1, 2], index=[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]) + umax = np.iinfo("uint64").max + ser = Series([1, 2], index=[umax - 1, umax]) - result = s.loc[np.iinfo("uint64").max - 1] - expected = s.iloc[0] + result = ser.loc[umax - 1] + expected = ser.iloc[0] assert result == expected - result = s.loc[[np.iinfo("uint64").max - 1]] - expected = s.iloc[[0]] + result = ser.loc[[umax - 1]] + expected = ser.iloc[[0]] tm.assert_series_equal(result, expected) - result = s.loc[[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]] - tm.assert_series_equal(result, s) + result = ser.loc[[umax - 1, umax]] + tm.assert_series_equal(result, ser) + + def test_loc_uint64_disallow_negative(self): + # GH#41775 + umax = np.iinfo("uint64").max + ser = Series([1, 2], index=[umax - 1, umax]) + + with pytest.raises(KeyError, match="-1"): + # don't wrap around + ser.loc[-1] + + with pytest.raises(KeyError, match="-1"): + # don't wrap around + ser.loc[[-1]] def test_loc_setitem_empty_append_expands_rows(self): # GH6173, various appends to an empty dataframe From 591059e00b4cc8de0f54c3a14227fb49ddcd6952 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Jun 2021 16:28:40 -0700 Subject: [PATCH 2/3] Post-merge de-duplication --- pandas/core/indexes/base.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f82571db3913a..db718916d7fd7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5411,10 +5411,6 @@ def _find_common_type_compat(self, target) -> DtypeObj: dtype = find_common_type([self.dtype, target_dtype]) - if {self.dtype.kind, target_dtype.kind} == {"i", "u"}: - # See comment in Index.union about losslessness - return np.dtype("object") - if dtype.kind in ["i", "u"]: # TODO: what about reversed with self being categorical? if ( From 2c2dfd594bdf2ff3ba845f0e2a2cc17a2c0ec502 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Jun 2021 16:30:19 -0700 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b36499c340fd9..cac7b9d8677b0 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -945,6 +945,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returning :class:`MultiIndex` in wrong order if indexer has duplicates (:issue:`40978`) - Bug in :meth:`DataFrame.__setitem__` raising ``TypeError`` when using a str subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`) - Bug in :meth:`PeriodIndex.get_loc` failing to raise ``KeyError`` when given a :class:`Period` with a mismatched ``freq`` (:issue:`41670`) +- Bug ``.loc.__getitem__`` with a :class:`UInt64Index` and negative-integer keys raising ``OverflowError`` instead of ``KeyError`` in some cases, wrapping around to positive integers in others (:issue:`41777`) Missing ^^^^^^^