Skip to content

REF: move MultiIndex._get_indexer casting to _maybe_promote #42109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3402,6 +3402,9 @@ def get_indexer(
if not self._index_as_unique:
raise InvalidIndexError(self._requires_unique_msg)

if len(target) == 0:
return np.array([], dtype=np.intp)

if not self._should_compare(target) and not is_interval_dtype(self.dtype):
# IntervalIndex get special treatment bc numeric scalars can be
# matched to Interval scalars
Expand All @@ -3410,9 +3413,14 @@ def get_indexer(
if is_categorical_dtype(target.dtype):
# potential fastpath
# get an indexer for unique categories then propagate to codes via take_nd
# Note: calling get_indexer instead of _get_indexer causes
# RecursionError GH#42088
categories_indexer = self._get_indexer(target.categories)
if is_categorical_dtype(self.dtype):
# Avoid RecursionError GH#42088
categories_indexer = self._get_indexer(target.categories)
else:
# get_indexer instead of _get_indexer needed for MultiIndex cases
# e.g. test_append_different_columns_types
categories_indexer = self.get_indexer(target.categories)

indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)

if (not self._is_multi and self.hasnans) and target.hasnans:
Expand Down Expand Up @@ -5349,6 +5357,14 @@ def _maybe_promote(self, other: Index) -> tuple[Index, Index]:
# TODO: may need itemsize check if we have non-64-bit Indexes
return self, other.astype(self.dtype)

elif self._is_multi and not other._is_multi:
try:
# "Type[Index]" has no attribute "from_tuples"
other = type(self).from_tuples(other) # type: ignore[attr-defined]
except (TypeError, ValueError):
# let's instead try with a straight Index
self = Index(self._values)

if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
# Reverse op so we dont need to re-implement on the subclasses
other, self = other._maybe_promote(self)
Expand Down
31 changes: 0 additions & 31 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2644,37 +2644,6 @@ def _get_partial_string_timestamp_match_key(self, key):

return key

def _get_indexer(
self,
target: Index,
method: str | None = None,
limit: int | None = None,
tolerance=None,
) -> np.ndarray:
# returned ndarray is np.intp

# empty indexer
if not len(target):
return ensure_platform_int(np.array([]))

if not isinstance(target, MultiIndex):
try:
target = MultiIndex.from_tuples(target)
except (TypeError, ValueError):

# let's instead try with a straight Index
if method is None:
return Index(self._values).get_indexer(
target, method=method, limit=limit, tolerance=tolerance
)

# TODO: explicitly raise here? we only have one test that
# gets here, and it is checking that we raise with method="nearest"

# Note: we only get here (in extant tests at least) with
# target.nlevels == self.nlevels
return super()._get_indexer(target, method, limit, tolerance)

def get_slice_bound(
self, label: Hashable | Sequence[Hashable], side: str, kind: str | None = None
) -> int:
Expand Down