From 31c372e575802ce794216561344734ac67f6ceb6 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Jun 2021 09:35:45 -0700 Subject: [PATCH] REF: move MultiIndex._get_indexer casting to _maybe_promote --- pandas/core/indexes/base.py | 22 +++++++++++++++++++--- pandas/core/indexes/multi.py | 31 ------------------------------- 2 files changed, 19 insertions(+), 34 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 88fded29810ff..eca584239b33e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3402,6 +3402,9 @@ def get_indexer( if not self._index_as_unique: raise InvalidIndexError(self._requires_unique_msg) + if len(target) == 0: + return np.array([], dtype=np.intp) + if not self._should_compare(target) and not is_interval_dtype(self.dtype): # IntervalIndex get special treatment bc numeric scalars can be # matched to Interval scalars @@ -3410,9 +3413,14 @@ def get_indexer( if is_categorical_dtype(target.dtype): # potential fastpath # get an indexer for unique categories then propagate to codes via take_nd - # Note: calling get_indexer instead of _get_indexer causes - # RecursionError GH#42088 - categories_indexer = self._get_indexer(target.categories) + if is_categorical_dtype(self.dtype): + # Avoid RecursionError GH#42088 + categories_indexer = self._get_indexer(target.categories) + else: + # get_indexer instead of _get_indexer needed for MultiIndex cases + # e.g. test_append_different_columns_types + categories_indexer = self.get_indexer(target.categories) + indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1) if (not self._is_multi and self.hasnans) and target.hasnans: @@ -5343,6 +5351,14 @@ def _maybe_promote(self, other: Index) -> tuple[Index, Index]: # TODO: may need itemsize check if we have non-64-bit Indexes return self, other.astype(self.dtype) + elif self._is_multi and not other._is_multi: + try: + # "Type[Index]" has no attribute "from_tuples" + other = type(self).from_tuples(other) # type: ignore[attr-defined] + except (TypeError, ValueError): + # let's instead try with a straight Index + self = Index(self._values) + if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): # Reverse op so we dont need to re-implement on the subclasses other, self = other._maybe_promote(self) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f1f418453c68b..83a4b6a8d56c2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2644,37 +2644,6 @@ def _get_partial_string_timestamp_match_key(self, key): return key - def _get_indexer( - self, - target: Index, - method: str | None = None, - limit: int | None = None, - tolerance=None, - ) -> np.ndarray: - # returned ndarray is np.intp - - # empty indexer - if not len(target): - return ensure_platform_int(np.array([])) - - if not isinstance(target, MultiIndex): - try: - target = MultiIndex.from_tuples(target) - except (TypeError, ValueError): - - # let's instead try with a straight Index - if method is None: - return Index(self._values).get_indexer( - target, method=method, limit=limit, tolerance=tolerance - ) - - # TODO: explicitly raise here? we only have one test that - # gets here, and it is checking that we raise with method="nearest" - - # Note: we only get here (in extant tests at least) with - # target.nlevels == self.nlevels - return super()._get_indexer(target, method, limit, tolerance) - def get_slice_bound( self, label: Hashable | Sequence[Hashable], side: str, kind: str | None = None ) -> int: