Skip to content

Commit 7d0a98e

Browse files
ri938jreback
authored andcommitted
Bug issue 16819 Index.get_indexer_not_unique inconsistent return types vs get_indexer (#16826)
1 parent cc5d20f commit 7d0a98e

File tree

5 files changed

+19
-8
lines changed

5 files changed

+19
-8
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Backwards incompatible API changes
5353
- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
5454
- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
5555
- Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
56+
- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`)
5657

5758
.. _whatsnew_0210.api:
5859

pandas/core/groupby.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -896,8 +896,9 @@ def reset_identity(values):
896896
# we can't reindex, so we resort to this
897897
# GH 14776
898898
if isinstance(ax, MultiIndex) and not ax.is_unique:
899-
result = result.take(result.index.get_indexer_for(
900-
ax.values).unique(), axis=self.axis)
899+
indexer = algorithms.unique1d(
900+
result.index.get_indexer_for(ax.values))
901+
result = result.take(indexer, axis=self.axis)
901902
else:
902903
result = result.reindex_axis(ax, axis=self.axis)
903904

pandas/core/indexes/base.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -2256,8 +2256,8 @@ def intersection(self, other):
22562256
indexer = indexer.take((indexer != -1).nonzero()[0])
22572257
except:
22582258
# duplicates
2259-
indexer = Index(other._values).get_indexer_non_unique(
2260-
self._values)[0].unique()
2259+
indexer = algos.unique1d(
2260+
Index(other._values).get_indexer_non_unique(self._values)[0])
22612261
indexer = indexer[indexer != -1]
22622262

22632263
taken = other.take(indexer)
@@ -2704,7 +2704,7 @@ def get_indexer_non_unique(self, target):
27042704
tgt_values = target._values
27052705

27062706
indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
2707-
return Index(indexer), missing
2707+
return indexer, missing
27082708

27092709
def get_indexer_for(self, target, **kwargs):
27102710
"""
@@ -2942,7 +2942,6 @@ def _reindex_non_unique(self, target):
29422942
else:
29432943

29442944
# need to retake to have the same size as the indexer
2945-
indexer = indexer.values
29462945
indexer[~check] = 0
29472946

29482947
# reset the new indexer to account for the new size

pandas/tests/indexes/test_base.py

+11
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,17 @@ def test_get_indexer_strings(self):
11311131
with pytest.raises(TypeError):
11321132
idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2)
11331133

1134+
def test_get_indexer_consistency(self):
1135+
# See GH 16819
1136+
for name, index in self.indices.items():
1137+
indexer = index.get_indexer(index[0:2])
1138+
assert isinstance(indexer, np.ndarray)
1139+
assert indexer.dtype == np.intp
1140+
1141+
indexer, _ = index.get_indexer_non_unique(index[0:2])
1142+
assert isinstance(indexer, np.ndarray)
1143+
assert indexer.dtype == np.intp
1144+
11341145
def test_get_loc(self):
11351146
idx = pd.Index([0, 1, 2])
11361147
all_methods = [None, 'pad', 'backfill', 'nearest']

pandas/tests/indexes/test_category.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,7 @@ def test_reindexing(self):
386386
expected = oidx.get_indexer_non_unique(finder)[0]
387387

388388
actual = ci.get_indexer(finder)
389-
tm.assert_numpy_array_equal(
390-
expected.values, actual, check_dtype=False)
389+
tm.assert_numpy_array_equal(expected, actual)
391390

392391
def test_reindex_dtype(self):
393392
c = CategoricalIndex(['a', 'b', 'c', 'a'])

0 commit comments

Comments
 (0)