Skip to content

Commit 5081748

Browse files
authored
BUG: DTI/TDI/PI get_indexer_non_unique with incompatible dtype (#32650)
1 parent 96d22d4 commit 5081748

File tree

8 files changed

+131
-15
lines changed

8 files changed

+131
-15
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ Indexing
301301
- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
302302
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
303303
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
304+
- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)
304305

305306
Missing
306307
^^^^^^^

pandas/core/indexes/base.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
ensure_platform_int,
3030
is_bool,
3131
is_bool_dtype,
32-
is_categorical,
3332
is_categorical_dtype,
3433
is_datetime64_any_dtype,
3534
is_dtype_equal,
@@ -532,6 +531,9 @@ def _shallow_copy_with_infer(self, values, **kwargs):
532531
return self._constructor(values, **attributes)
533532
except (TypeError, ValueError):
534533
pass
534+
535+
# Remove tz so Index will try non-DatetimeIndex inference
536+
attributes.pop("tz", None)
535537
return Index(values, **attributes)
536538

537539
def _update_inplace(self, result, **kwargs):
@@ -4657,10 +4659,8 @@ def get_indexer_non_unique(self, target):
46574659
if pself is not self or ptarget is not target:
46584660
return pself.get_indexer_non_unique(ptarget)
46594661

4660-
if is_categorical(target):
4662+
if is_categorical_dtype(target.dtype):
46614663
tgt_values = np.asarray(target)
4662-
elif self.is_all_dates and target.is_all_dates: # GH 30399
4663-
tgt_values = target.asi8
46644664
else:
46654665
tgt_values = target._get_engine_target()
46664666

pandas/core/indexes/datetimelike.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88

99
from pandas._libs import NaT, iNaT, join as libjoin, lib
1010
from pandas._libs.tslibs import timezones
11-
from pandas._typing import Label
11+
from pandas._typing import DtypeObj, Label
1212
from pandas.compat.numpy import function as nv
1313
from pandas.errors import AbstractMethodError
1414
from pandas.util._decorators import Appender, cache_readonly, doc
1515

1616
from pandas.core.dtypes.common import (
1717
ensure_int64,
18+
ensure_platform_int,
1819
is_bool_dtype,
1920
is_categorical_dtype,
2021
is_dtype_equal,
@@ -32,7 +33,7 @@
3233
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3334
from pandas.core.base import IndexOpsMixin
3435
import pandas.core.indexes.base as ibase
35-
from pandas.core.indexes.base import Index, _index_shared_docs
36+
from pandas.core.indexes.base import Index, _index_shared_docs, ensure_index
3637
from pandas.core.indexes.extension import (
3738
ExtensionIndex,
3839
inherit_names,
@@ -101,6 +102,12 @@ class DatetimeIndexOpsMixin(ExtensionIndex):
101102
def is_all_dates(self) -> bool:
102103
return True
103104

105+
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
106+
"""
107+
Can we compare values of the given dtype to our own?
108+
"""
109+
raise AbstractMethodError(self)
110+
104111
# ------------------------------------------------------------------------
105112
# Abstract data attributes
106113

@@ -426,6 +433,21 @@ def _partial_date_slice(
426433
# try to find the dates
427434
return (lhs_mask & rhs_mask).nonzero()[0]
428435

436+
@Appender(Index.get_indexer_non_unique.__doc__)
437+
def get_indexer_non_unique(self, target):
438+
target = ensure_index(target)
439+
pself, ptarget = self._maybe_promote(target)
440+
if pself is not self or ptarget is not target:
441+
return pself.get_indexer_non_unique(ptarget)
442+
443+
if not self._is_comparable_dtype(target.dtype):
444+
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
445+
return no_matches, no_matches
446+
447+
tgt_values = target.asi8
448+
indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
449+
return ensure_platform_int(indexer), missing
450+
429451
# --------------------------------------------------------------------
430452

431453
__add__ = make_wrapped_arith_op("__add__")

pandas/core/indexes/datetimes.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,18 @@
77

88
from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts
99
from pandas._libs.tslibs import fields, parsing, timezones
10-
from pandas._typing import Label
10+
from pandas._typing import DtypeObj, Label
1111
from pandas.util._decorators import cache_readonly
1212

13-
from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar
13+
from pandas.core.dtypes.common import (
14+
_NS_DTYPE,
15+
is_datetime64_any_dtype,
16+
is_datetime64_dtype,
17+
is_datetime64tz_dtype,
18+
is_float,
19+
is_integer,
20+
is_scalar,
21+
)
1422
from pandas.core.dtypes.missing import is_valid_nat_for_dtype
1523

1624
from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype
@@ -298,6 +306,18 @@ def _convert_for_op(self, value):
298306
return Timestamp(value).asm8
299307
raise ValueError("Passed item and index have different timezone")
300308

309+
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
310+
"""
311+
Can we compare values of the given dtype to our own?
312+
"""
313+
if not is_datetime64_any_dtype(dtype):
314+
return False
315+
if self.tz is not None:
316+
# If we have tz, we can compare to tzaware
317+
return is_datetime64tz_dtype(dtype)
318+
# if we dont have tz, we can only compare to tznaive
319+
return is_datetime64_dtype(dtype)
320+
301321
# --------------------------------------------------------------------
302322
# Rendering Methods
303323

pandas/core/indexes/period.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pandas._libs.tslibs import frequencies as libfrequencies, resolution
1010
from pandas._libs.tslibs.parsing import parse_time_string
1111
from pandas._libs.tslibs.period import Period
12-
from pandas._typing import Label
12+
from pandas._typing import DtypeObj, Label
1313
from pandas.util._decorators import Appender, cache_readonly
1414

1515
from pandas.core.dtypes.common import (
@@ -23,6 +23,7 @@
2323
is_scalar,
2424
pandas_dtype,
2525
)
26+
from pandas.core.dtypes.dtypes import PeriodDtype
2627

2728
from pandas.core.arrays.period import (
2829
PeriodArray,
@@ -298,6 +299,14 @@ def _maybe_convert_timedelta(self, other):
298299
# raise when input doesn't have freq
299300
raise raise_on_incompatible(self, None)
300301

302+
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
303+
"""
304+
Can we compare values of the given dtype to our own?
305+
"""
306+
if not isinstance(dtype, PeriodDtype):
307+
return False
308+
return dtype.freq == self.freq
309+
301310
# ------------------------------------------------------------------------
302311
# Rendering Methods
303312

@@ -454,12 +463,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
454463
def get_indexer_non_unique(self, target):
455464
target = ensure_index(target)
456465

457-
if isinstance(target, PeriodIndex):
458-
if target.freq != self.freq:
459-
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
460-
return no_matches, no_matches
466+
if not self._is_comparable_dtype(target.dtype):
467+
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
468+
return no_matches, no_matches
461469

462-
target = target.asi8
470+
target = target.asi8
463471

464472
indexer, missing = self._int64index.get_indexer_non_unique(target)
465473
return ensure_platform_int(indexer), missing

pandas/core/indexes/timedeltas.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
""" implement the TimedeltaIndex """
22

33
from pandas._libs import NaT, Timedelta, index as libindex
4-
from pandas._typing import Label
4+
from pandas._typing import DtypeObj, Label
55
from pandas.util._decorators import Appender
66

77
from pandas.core.dtypes.common import (
@@ -213,6 +213,12 @@ def _maybe_promote(self, other):
213213
other = TimedeltaIndex(other)
214214
return self, other
215215

216+
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
217+
"""
218+
Can we compare values of the given dtype to our own?
219+
"""
220+
return is_timedelta64_dtype(dtype)
221+
216222
def get_loc(self, key, method=None, tolerance=None):
217223
"""
218224
Get integer location for requested label

pandas/tests/indexes/test_base.py

+37
Original file line numberDiff line numberDiff line change
@@ -2616,3 +2616,40 @@ def test_convert_almost_null_slice(indices):
26162616
msg = "'>=' not supported between instances of 'str' and 'int'"
26172617
with pytest.raises(TypeError, match=msg):
26182618
idx._convert_slice_indexer(key, "loc")
2619+
2620+
2621+
dtlike_dtypes = [
2622+
np.dtype("timedelta64[ns]"),
2623+
np.dtype("datetime64[ns]"),
2624+
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
2625+
pd.PeriodDtype("ns"),
2626+
]
2627+
2628+
2629+
@pytest.mark.parametrize("ldtype", dtlike_dtypes)
2630+
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
2631+
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
2632+
2633+
vals = np.tile(3600 * 10 ** 9 * np.arange(3), 2)
2634+
2635+
def construct(dtype):
2636+
if dtype is dtlike_dtypes[-1]:
2637+
# PeriodArray will try to cast ints to strings
2638+
return pd.DatetimeIndex(vals).astype(dtype)
2639+
return pd.Index(vals, dtype=dtype)
2640+
2641+
left = construct(ldtype)
2642+
right = construct(rdtype)
2643+
2644+
result = left.get_indexer_non_unique(right)
2645+
2646+
if ldtype is rdtype:
2647+
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
2648+
ex2 = np.array([], dtype=np.intp)
2649+
tm.assert_numpy_array_equal(result[0], ex1)
2650+
tm.assert_numpy_array_equal(result[1], ex2.astype(np.int64))
2651+
2652+
else:
2653+
no_matches = np.array([-1] * 6, dtype=np.intp)
2654+
tm.assert_numpy_array_equal(result[0], no_matches)
2655+
tm.assert_numpy_array_equal(result[1], no_matches)

pandas/tests/indexing/test_loc.py

+22
Original file line numberDiff line numberDiff line change
@@ -1073,3 +1073,25 @@ def test_loc_slice_disallows_positional():
10731073
with tm.assert_produces_warning(FutureWarning):
10741074
# GH#31840 deprecated incorrect behavior
10751075
df.loc[1:3, 1] = 2
1076+
1077+
1078+
def test_loc_datetimelike_mismatched_dtypes():
1079+
# GH#32650 dont mix and match datetime/timedelta/period dtypes
1080+
1081+
df = pd.DataFrame(
1082+
np.random.randn(5, 3),
1083+
columns=["a", "b", "c"],
1084+
index=pd.date_range("2012", freq="H", periods=5),
1085+
)
1086+
# create dataframe with non-unique DatetimeIndex
1087+
df = df.iloc[[0, 2, 2, 3]].copy()
1088+
1089+
dti = df.index
1090+
tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values
1091+
1092+
msg = r"None of \[TimedeltaIndex.* are in the \[index\]"
1093+
with pytest.raises(KeyError, match=msg):
1094+
df.loc[tdi]
1095+
1096+
with pytest.raises(KeyError, match=msg):
1097+
df["a"].loc[tdi]

0 commit comments

Comments
 (0)