From 7598d8649dbc42dd36668d78a5f1fa0370c09aec Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Jun 2021 09:17:01 -0700 Subject: [PATCH 1/2] REF: de-duplicate MultiIndex.reindex --- pandas/core/indexes/base.py | 29 +++++++++++++++---- pandas/core/indexes/multi.py | 56 ++++++------------------------------ 2 files changed, 32 insertions(+), 53 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6219aa07478d7..54297d8a31e2b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3762,14 +3762,24 @@ def reindex( target = ensure_has_len(target) # target may be an iterator if not isinstance(target, Index) and len(target) == 0: - target = self[:0] + if level is not None and self._is_multi: + idx = self.levels[level] + else: + idx = self + target = idx[:0] else: target = ensure_index(target) if level is not None: if method is not None: raise TypeError("Fill method not supported if level passed") - _, indexer, _ = self._join_level(target, level, how="right") + + # TODO: tests where passing `keep_order=not self._is_multi` + # makes a difference for non-MultiIndex case + target, indexer, _ = self._join_level( + target, level, how="right", keep_order=not self._is_multi + ) + else: if self.equals(target): indexer = None @@ -3778,6 +3788,8 @@ def reindex( indexer = self.get_indexer( target, method=method, limit=limit, tolerance=tolerance ) + elif self._is_multi: + raise ValueError("cannot handle a non-unique multi-index!") else: if method is not None or limit is not None: raise ValueError( @@ -3786,11 +3798,18 @@ def reindex( ) indexer, _ = self.get_indexer_non_unique(target) + target = self._wrap_reindex_result(target, indexer, preserve_names) + return target, indexer + + def _wrap_reindex_result(self, target, indexer, preserve_names: bool): + target = self._maybe_preserve_names(target, preserve_names) + return target + + def _maybe_preserve_names(self, target: Index, preserve_names: bool): if preserve_names and target.nlevels == 1 and target.name != self.name: - target = target.copy() + target = target.copy(deep=False) target.name = self.name - - return target, indexer + return target @final def _reindex_non_unique( diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d9e9859ea10ed..0a435f83cc489 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2476,51 +2476,7 @@ def sortlevel( return new_index, indexer - def reindex( - self, target, method=None, level=None, limit=None, tolerance=None - ) -> tuple[MultiIndex, np.ndarray | None]: - """ - Create index with target's values (move/add/delete values as necessary) - - Returns - ------- - new_index : pd.MultiIndex - Resulting index - indexer : np.ndarray[np.intp] or None - Indices of output values in original index. - - """ - # GH6552: preserve names when reindexing to non-named target - # (i.e. neither Index nor Series). - preserve_names = not hasattr(target, "names") - - if level is not None: - if method is not None: - raise TypeError("Fill method not supported if level passed") - - # GH7774: preserve dtype/tz if target is empty and not an Index. - # target may be an iterator - target = ibase.ensure_has_len(target) - if len(target) == 0 and not isinstance(target, Index): - idx = self.levels[level] - target = idx[:0] - else: - target = ensure_index(target) - target, indexer, _ = self._join_level( - target, level, how="right", keep_order=False - ) - else: - target = ensure_index(target) - if self.equals(target): - indexer = None - else: - if self.is_unique: - indexer = self.get_indexer( - target, method=method, limit=limit, tolerance=tolerance - ) - else: - raise ValueError("cannot handle a non-unique multi-index!") - + def _wrap_reindex_result(self, target, indexer, preserve_names: bool): if not isinstance(target, MultiIndex): if indexer is None: target = self @@ -2531,7 +2487,12 @@ def reindex( target = MultiIndex.from_tuples(target) except TypeError: # not all tuples, see test_constructor_dict_multiindex_reindex_flat - return target, indexer + return target + + target = self._maybe_preserve_names(target, preserve_names) + return target + + def _maybe_preserve_names(self, target: Index, preserve_names: bool): if ( preserve_names and target.nlevels == self.nlevels @@ -2539,8 +2500,7 @@ def reindex( ): target = target.copy(deep=False) target.names = self.names - - return target, indexer + return target # -------------------------------------------------------------------- # Indexing Methods From 740a85aee4b038cec6ffc8405439b09d53ef7dac Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Jun 2021 10:10:50 -0700 Subject: [PATCH 2/2] mypy fixup --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 54297d8a31e2b..9bd2051a9a07f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3763,7 +3763,8 @@ def reindex( if not isinstance(target, Index) and len(target) == 0: if level is not None and self._is_multi: - idx = self.levels[level] + # "Index" has no attribute "levels"; maybe "nlevels"? + idx = self.levels[level] # type: ignore[attr-defined] else: idx = self target = idx[:0]