Skip to content

Commit dbf5d6a

Browse files
jbrockmendelyeshsurya
authored andcommitted
TYP: Index.reindex (pandas-dev#40950)
1 parent 40b6104 commit dbf5d6a

File tree

2 files changed

+43
-17
lines changed

2 files changed

+43
-17
lines changed

pandas/core/indexes/base.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -3762,7 +3762,9 @@ def _validate_can_reindex(self, indexer: np.ndarray) -> None:
37623762
if not self._index_as_unique and len(indexer):
37633763
raise ValueError("cannot reindex from a duplicate axis")
37643764

3765-
def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
3765+
def reindex(
3766+
self, target, method=None, level=None, limit=None, tolerance=None
3767+
) -> tuple[Index, np.ndarray | None]:
37663768
"""
37673769
Create index with target's values.
37683770
@@ -3774,7 +3776,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
37743776
-------
37753777
new_index : pd.Index
37763778
Resulting index.
3777-
indexer : np.ndarray or None
3779+
indexer : np.ndarray[np.intp] or None
37783780
Indices of output values in original index.
37793781
"""
37803782
# GH6552: preserve names when reindexing to non-named target
@@ -3815,7 +3817,9 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
38153817

38163818
return target, indexer
38173819

3818-
def _reindex_non_unique(self, target):
3820+
def _reindex_non_unique(
3821+
self, target: Index
3822+
) -> tuple[Index, np.ndarray, np.ndarray | None]:
38193823
"""
38203824
Create a new index with target's values (move/add/delete values as
38213825
necessary) use with non-unique Index and a possibly non-unique target.
@@ -3828,8 +3832,9 @@ def _reindex_non_unique(self, target):
38283832
-------
38293833
new_index : pd.Index
38303834
Resulting index.
3831-
indexer : np.ndarray or None
3835+
indexer : np.ndarray[np.intp]
38323836
Indices of output values in original index.
3837+
new_indexer : np.ndarray[np.intp] or None
38333838
38343839
"""
38353840
target = ensure_index(target)
@@ -3858,13 +3863,13 @@ def _reindex_non_unique(self, target):
38583863
# GH#38906
38593864
if not len(self):
38603865

3861-
new_indexer = np.arange(0)
3866+
new_indexer = np.arange(0, dtype=np.intp)
38623867

38633868
# a unique indexer
38643869
elif target.is_unique:
38653870

38663871
# see GH5553, make sure we use the right indexer
3867-
new_indexer = np.arange(len(indexer))
3872+
new_indexer = np.arange(len(indexer), dtype=np.intp)
38683873
new_indexer[cur_indexer] = np.arange(len(cur_labels))
38693874
new_indexer[missing_indexer] = -1
38703875

@@ -3876,7 +3881,7 @@ def _reindex_non_unique(self, target):
38763881
indexer[~check] = -1
38773882

38783883
# reset the new indexer to account for the new size
3879-
new_indexer = np.arange(len(self.take(indexer)))
3884+
new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)
38803885
new_indexer[~check] = -1
38813886

38823887
if isinstance(self, ABCMultiIndex):

pandas/core/indexes/category.py

+31-10
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas._config import get_option
1212

1313
from pandas._libs import index as libindex
14+
from pandas._libs.lib import no_default
1415
from pandas._typing import (
1516
ArrayLike,
1617
Dtype,
@@ -233,6 +234,22 @@ def __new__(
233234

234235
# --------------------------------------------------------------------
235236

237+
@doc(Index._shallow_copy)
238+
def _shallow_copy(
239+
self,
240+
values: Categorical,
241+
name: Hashable = no_default,
242+
) -> CategoricalIndex:
243+
name = self._name if name is no_default else name
244+
245+
if values is not None:
246+
# In tests we only get here with Categorical objects that
247+
# have matching .ordered, and values.categories a subset of
248+
# our own. However we do _not_ have a dtype match in general.
249+
values = Categorical(values, dtype=self.dtype)
250+
251+
return super()._shallow_copy(values=values, name=name)
252+
236253
def _is_dtype_compat(self, other) -> Categorical:
237254
"""
238255
*this is an internal non-public method*
@@ -369,6 +386,15 @@ def fillna(self, value, downcast=None):
369386

370387
return type(self)._simple_new(cat, name=self.name)
371388

389+
@doc(Index.unique)
390+
def unique(self, level=None):
391+
if level is not None:
392+
self._validate_index_level(level)
393+
result = self._values.unique()
394+
# Use _simple_new instead of _shallow_copy to ensure we keep dtype
395+
# of result, not self.
396+
return type(self)._simple_new(result, name=self.name)
397+
372398
def reindex(
373399
self, target, method=None, level=None, limit=None, tolerance=None
374400
) -> tuple[Index, np.ndarray | None]:
@@ -431,8 +457,8 @@ def reindex(
431457
# in which case we are going to conform to the passed Categorical
432458
new_target = np.asarray(new_target)
433459
if is_categorical_dtype(target):
434-
cat = Categorical(new_target, dtype=target.dtype)
435-
new_target = type(self)._simple_new(cat, name=self.name)
460+
new_target = Categorical(new_target, dtype=target.dtype)
461+
new_target = type(self)._simple_new(new_target, name=self.name)
436462
else:
437463
new_target = Index(new_target, name=self.name)
438464

@@ -482,23 +508,18 @@ def _get_indexer(
482508
limit: int | None = None,
483509
tolerance=None,
484510
) -> np.ndarray:
485-
# returned ndarray is np.intp
486511

487512
if self.equals(target):
488513
return np.arange(len(self), dtype="intp")
489514

490515
return self._get_indexer_non_unique(target._values)[0]
491516

492517
@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
493-
def get_indexer_non_unique(self, target) -> tuple[np.ndarray, np.ndarray]:
494-
# both returned ndarrays are np.intp
518+
def get_indexer_non_unique(self, target):
495519
target = ibase.ensure_index(target)
496520
return self._get_indexer_non_unique(target._values)
497521

498-
def _get_indexer_non_unique(
499-
self, values: ArrayLike
500-
) -> tuple[np.ndarray, np.ndarray]:
501-
# both returned ndarrays are np.intp
522+
def _get_indexer_non_unique(self, values: ArrayLike):
502523
"""
503524
get_indexer_non_unique but after unrapping the target Index object.
504525
"""
@@ -517,7 +538,7 @@ def _get_indexer_non_unique(
517538
codes = self.categories.get_indexer(values)
518539

519540
indexer, missing = self._engine.get_indexer_non_unique(codes)
520-
return ensure_platform_int(indexer), ensure_platform_int(missing)
541+
return ensure_platform_int(indexer), missing
521542

522543
@doc(Index._convert_list_indexer)
523544
def _convert_list_indexer(self, keyarr):

0 commit comments

Comments
 (0)