diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 09fe885e47754..06b57662149ad 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4937,6 +4937,43 @@ def get_indexer_for(self, target, **kwargs): indexer, _ = self.get_indexer_non_unique(target) return indexer + def _get_indexer_non_comparable(self, target: "Index", method, unique: bool = True): + """ + Called from get_indexer or get_indexer_non_unique when the target + is of a non-comparable dtype. + + For get_indexer lookups with method=None, get_indexer is an _equality_ + check, so non-comparable dtypes mean we will always have no matches. + + For get_indexer lookups with a method, get_indexer is an _inequality_ + check, so non-comparable dtypes mean we will always raise TypeError. + + Parameters + ---------- + target : Index + method : str or None + unique : bool, default True + * True if called from get_indexer. + * False if called from get_indexer_non_unique. + + Raises + ------ + TypeError + If doing an inequality check, i.e. method is not None. + """ + if method is not None: + other = _unpack_nested_dtype(target) + raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}") + + no_matches = -1 * np.ones(target.shape, dtype=np.intp) + if unique: + # This is for get_indexer + return no_matches + else: + # This is for get_indexer_non_unique + missing = np.arange(len(target), dtype=np.intp) + return no_matches, missing + @property def _index_as_unique(self): """ @@ -4972,6 +5009,14 @@ def _maybe_promote(self, other: "Index"): return self, other + def _should_compare(self, other: "Index") -> bool: + """ + Check if `self == other` can ever have non-False entries. + """ + other = _unpack_nested_dtype(other) + dtype = other.dtype + return self._is_comparable_dtype(dtype) or is_object_dtype(dtype) + def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: """ Can we compare values of the given dtype to our own? @@ -6119,3 +6164,24 @@ def get_unanimous_names(*indexes: Index) -> Tuple[Label, ...]: name_sets = [{*ns} for ns in zip_longest(*name_tups)] names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets) return names + + +def _unpack_nested_dtype(other: Index) -> Index: + """ + When checking if our dtype is comparable with another, we need + to unpack CategoricalDtype to look at its categories.dtype. + + Parameters + ---------- + other : Index + + Returns + ------- + Index + """ + dtype = other.dtype + if is_categorical_dtype(dtype): + # If there is ever a SparseIndex, this could get dispatched + # here too. + return dtype.categories + return other diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b223e583d0ce0..ae0126df4f088 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -452,13 +452,10 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) def get_indexer(self, target, method=None, limit=None, tolerance=None): target = ensure_index(target) - if isinstance(target, PeriodIndex): - if not self._is_comparable_dtype(target.dtype): - # i.e. target.freq != self.freq - # No matches - no_matches = -1 * np.ones(self.shape, dtype=np.intp) - return no_matches + if not self._should_compare(target): + return self._get_indexer_non_comparable(target, method, unique=True) + if isinstance(target, PeriodIndex): target = target._get_engine_target() # i.e. target.asi8 self_index = self._int64index else: diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 9b203e1b17517..c03c89f32f73e 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -21,6 +21,28 @@ ) import pandas._testing as tm +dti4 = date_range("2016-01-01", periods=4) +dti = dti4[:-1] +rng = pd.Index(range(3)) + + +@pytest.fixture( + params=[ + dti, + dti.tz_localize("UTC"), + dti.to_period("W"), + dti - dti[0], + rng, + pd.Index([1, 2, 3]), + pd.Index([2.0, 3.0, 4.0]), + pd.Index([4, 5, 6], dtype="u8"), + pd.IntervalIndex.from_breaks(dti4), + ] +) +def non_comparable_idx(request): + # All have length 3 + return request.param + class TestGetItem: def test_ellipsis(self): @@ -438,6 +460,37 @@ def test_get_indexer_mismatched_dtype(self): result = pi.get_indexer_non_unique(pi2)[0] tm.assert_numpy_array_equal(result, expected) + def test_get_indexer_mismatched_dtype_different_length(self, non_comparable_idx): + # without method we arent checking inequalities, so get all-missing + # but do not raise + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + + other = non_comparable_idx + + res = pi[:-1].get_indexer(other) + expected = -np.ones(other.shape, dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) + def test_get_indexer_mismatched_dtype_with_method(self, non_comparable_idx, method): + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + + other = non_comparable_idx + + msg = re.escape(f"Cannot compare dtypes {pi.dtype} and {other.dtype}") + with pytest.raises(TypeError, match=msg): + pi.get_indexer(other, method=method) + + for dtype in ["object", "category"]: + other2 = other.astype(dtype) + if dtype == "object" and isinstance(other, PeriodIndex): + continue + # For object dtype we are liable to get a different exception message + with pytest.raises(TypeError): + pi.get_indexer(other2, method=method) + def test_get_indexer_non_unique(self): # GH 17717 p1 = Period("2017-09-02")