diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 88481160a2860..40215ea87f978 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3312,7 +3312,8 @@ def _get_indexer( tolerance = self._convert_tolerance(tolerance, target) if not is_dtype_equal(self.dtype, target.dtype): - dtype = find_common_type([self.dtype, target.dtype]) + dtype = self._find_common_type_compat(target) + this = self.astype(dtype, copy=False) target = target.astype(dtype, copy=False) return this.get_indexer( @@ -5075,15 +5076,7 @@ def get_indexer_non_unique(self, target): if not is_dtype_equal(self.dtype, target.dtype): # TODO: if object, could use infer_dtype to preempt costly # conversion if still non-comparable? - dtype = find_common_type([self.dtype, target.dtype]) - if ( - dtype.kind in ["i", "u"] - and is_categorical_dtype(target.dtype) - and target.hasnans - ): - # FIXME: find_common_type incorrect with Categorical GH#38240 - # FIXME: some cases where float64 cast can be lossy? - dtype = np.dtype(np.float64) + dtype = self._find_common_type_compat(target) this = self.astype(dtype, copy=False) that = target.astype(dtype, copy=False) @@ -5196,6 +5189,20 @@ def _maybe_promote(self, other: Index): return self, other + def _find_common_type_compat(self, target: Index) -> DtypeObj: + """ + Implementation of find_common_type that adjusts for Index-specific + special cases. + """ + dtype = find_common_type([self.dtype, target.dtype]) + if dtype.kind in ["i", "u"]: + # TODO: what about reversed with self being categorical? + if is_categorical_dtype(target.dtype) and target.hasnans: + # FIXME: find_common_type incorrect with Categorical GH#38240 + # FIXME: some cases where float64 cast can be lossy? + dtype = np.dtype(np.float64) + return dtype + @final def _should_compare(self, other: Index) -> bool: """ diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 6068166cb8618..5b31776301f7b 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -212,17 +212,25 @@ def test_categories_assignments_wrong_length_raises(self, new_categories): # Combinations of missing/unique @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]]) @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex]) - def test_get_indexer_non_unique(self, idx_values, key_values, key_class): + @pytest.mark.parametrize("dtype", [None, "category", "key"]) + def test_get_indexer_non_unique(self, idx_values, key_values, key_class, dtype): # GH 21448 key = key_class(key_values, categories=range(1, 5)) + + if dtype == "key": + dtype = key.dtype + # Test for flat index and CategoricalIndex with same/different cats: - for dtype in [None, "category", key.dtype]: - idx = Index(idx_values, dtype=dtype) - expected, exp_miss = idx.get_indexer_non_unique(key_values) - result, res_miss = idx.get_indexer_non_unique(key) + idx = Index(idx_values, dtype=dtype) + expected, exp_miss = idx.get_indexer_non_unique(key_values) + result, res_miss = idx.get_indexer_non_unique(key) + + tm.assert_numpy_array_equal(expected, result) + tm.assert_numpy_array_equal(exp_miss, res_miss) - tm.assert_numpy_array_equal(expected, result) - tm.assert_numpy_array_equal(exp_miss, res_miss) + exp_unique = idx.unique().get_indexer(key_values) + res_unique = idx.unique().get_indexer(key) + tm.assert_numpy_array_equal(res_unique, exp_unique) def test_where_unobserved_nan(self): ser = Series(Categorical(["a", "b"]))