Skip to content

Commit 05d70f4

Browse files
authored
DOC: use shared docs on Index._convert_list_indexer (#15678)
CLN: push key coercion to the indexes themselves to simplify a bit
1 parent 998c801 commit 05d70f4

File tree

5 files changed

+106
-70
lines changed

5 files changed

+106
-70
lines changed

pandas/core/indexing.py

+21-65
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from pandas.types.generic import ABCDataFrame, ABCPanel, ABCSeries
88
from pandas.types.common import (is_integer_dtype,
99
is_integer, is_float,
10-
is_categorical_dtype,
1110
is_list_like,
1211
is_sequence,
1312
is_iterator,
@@ -1087,51 +1086,24 @@ def _getitem_iterable(self, key, axis=0):
10871086
inds, = key.nonzero()
10881087
return self.obj.take(inds, axis=axis, convert=False)
10891088
else:
1090-
if isinstance(key, Index):
1091-
keyarr = labels._convert_index_indexer(key)
1092-
else:
1093-
keyarr = _asarray_tuplesafe(key)
1094-
keyarr = labels._convert_arr_indexer(keyarr)
1095-
1096-
if is_categorical_dtype(labels):
1097-
keyarr = labels._shallow_copy(keyarr)
1098-
1099-
# have the index handle the indexer and possibly return
1100-
# an indexer or raising
1101-
indexer = labels._convert_list_indexer(keyarr, kind=self.name)
1089+
# Have the index compute an indexer or return None
1090+
# if it cannot handle
1091+
indexer, keyarr = labels._convert_listlike_indexer(
1092+
key, kind=self.name)
11021093
if indexer is not None:
11031094
return self.obj.take(indexer, axis=axis)
11041095

1105-
# this is not the most robust, but...
1106-
if (isinstance(labels, MultiIndex) and len(keyarr) and
1107-
not isinstance(keyarr[0], tuple)):
1108-
level = 0
1109-
else:
1110-
level = None
1111-
11121096
# existing labels are unique and indexer are unique
11131097
if labels.is_unique and Index(keyarr).is_unique:
11141098

11151099
try:
1116-
result = self.obj.reindex_axis(keyarr, axis=axis,
1117-
level=level)
1118-
1119-
# this is an error as we are trying to find
1120-
# keys in a multi-index that don't exist
1121-
if isinstance(labels, MultiIndex) and level is not None:
1122-
if (hasattr(result, 'ndim') and
1123-
not np.prod(result.shape) and len(keyarr)):
1124-
raise KeyError("cannot index a multi-index axis "
1125-
"with these keys")
1126-
1127-
return result
1128-
1100+
return self.obj.reindex_axis(keyarr, axis=axis)
11291101
except AttributeError:
11301102

11311103
# Series
11321104
if axis != 0:
11331105
raise AssertionError('axis must be 0')
1134-
return self.obj.reindex(keyarr, level=level)
1106+
return self.obj.reindex(keyarr)
11351107

11361108
# existing labels are non-unique
11371109
else:
@@ -1225,49 +1197,33 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
12251197

12261198
if is_nested_tuple(obj, labels):
12271199
return labels.get_locs(obj)
1200+
12281201
elif is_list_like_indexer(obj):
1202+
12291203
if is_bool_indexer(obj):
12301204
obj = check_bool_indexer(labels, obj)
12311205
inds, = obj.nonzero()
12321206
return inds
12331207
else:
1234-
if isinstance(obj, Index):
1235-
# want Index objects to pass through untouched
1236-
objarr = obj
1237-
else:
1238-
objarr = _asarray_tuplesafe(obj)
12391208

1240-
# The index may want to handle a list indexer differently
1241-
# by returning an indexer or raising
1242-
indexer = labels._convert_list_indexer(objarr, kind=self.name)
1209+
# Have the index compute an indexer or return None
1210+
# if it cannot handle
1211+
indexer, objarr = labels._convert_listlike_indexer(
1212+
obj, kind=self.name)
12431213
if indexer is not None:
12441214
return indexer
12451215

1246-
# this is not the most robust, but...
1247-
if (isinstance(labels, MultiIndex) and
1248-
not isinstance(objarr[0], tuple)):
1249-
level = 0
1250-
_, indexer = labels.reindex(objarr, level=level)
1216+
# unique index
1217+
if labels.is_unique:
1218+
indexer = check = labels.get_indexer(objarr)
12511219

1252-
# take all
1253-
if indexer is None:
1254-
indexer = np.arange(len(labels))
1255-
1256-
check = labels.levels[0].get_indexer(objarr)
1220+
# non-unique (dups)
12571221
else:
1258-
level = None
1259-
1260-
# unique index
1261-
if labels.is_unique:
1262-
indexer = check = labels.get_indexer(objarr)
1263-
1264-
# non-unique (dups)
1265-
else:
1266-
(indexer,
1267-
missing) = labels.get_indexer_non_unique(objarr)
1268-
# 'indexer' has dupes, create 'check' using 'missing'
1269-
check = np.zeros_like(objarr)
1270-
check[missing] = -1
1222+
(indexer,
1223+
missing) = labels.get_indexer_non_unique(objarr)
1224+
# 'indexer' has dupes, create 'check' using 'missing'
1225+
check = np.zeros_like(objarr)
1226+
check[missing] = -1
12711227

12721228
mask = check == -1
12731229
if mask.any():

pandas/indexes/base.py

+37
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,27 @@ def is_int(v):
13391339

13401340
return indexer
13411341

1342+
def _convert_listlike_indexer(self, keyarr, kind=None):
1343+
"""
1344+
Parameters
1345+
----------
1346+
keyarr : list-like
1347+
Indexer to convert.
1348+
1349+
Returns
1350+
-------
1351+
tuple (indexer, keyarr)
1352+
indexer is an ndarray or None if cannot convert
1353+
keyarr are tuple-safe keys
1354+
"""
1355+
if isinstance(keyarr, Index):
1356+
keyarr = self._convert_index_indexer(keyarr)
1357+
else:
1358+
keyarr = self._convert_arr_indexer(keyarr)
1359+
1360+
indexer = self._convert_list_indexer(keyarr, kind=kind)
1361+
return indexer, keyarr
1362+
13421363
_index_shared_docs['_convert_arr_indexer'] = """
13431364
Convert an array-like indexer to the appropriate dtype.
13441365
@@ -1354,6 +1375,7 @@ def is_int(v):
13541375

13551376
@Appender(_index_shared_docs['_convert_arr_indexer'])
13561377
def _convert_arr_indexer(self, keyarr):
1378+
keyarr = _asarray_tuplesafe(keyarr)
13571379
return keyarr
13581380

13591381
_index_shared_docs['_convert_index_indexer'] = """
@@ -1373,6 +1395,21 @@ def _convert_arr_indexer(self, keyarr):
13731395
def _convert_index_indexer(self, keyarr):
13741396
return keyarr
13751397

1398+
_index_shared_docs['_convert_list_indexer'] = """
1399+
Convert a list-like indexer to the appropriate dtype.
1400+
1401+
Parameters
1402+
----------
1403+
keyarr : Index (or sub-class)
1404+
Indexer to convert.
1405+
kind : iloc, ix, loc, optional
1406+
1407+
Returns
1408+
-------
1409+
positional indexer or None
1410+
"""
1411+
1412+
@Appender(_index_shared_docs['_convert_list_indexer'])
13761413
def _convert_list_indexer(self, keyarr, kind=None):
13771414
"""
13781415
passed a key that is tuplesafe that is integer based

pandas/indexes/category.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import pandas.core.base as base
1919
import pandas.core.missing as missing
2020
import pandas.indexes.base as ibase
21+
from pandas.core.common import _asarray_tuplesafe
22+
2123
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
2224
_index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))
2325

@@ -458,19 +460,26 @@ def get_indexer_non_unique(self, target):
458460
codes = self.categories.get_indexer(target)
459461
return self._engine.get_indexer_non_unique(codes)
460462

463+
@Appender(_index_shared_docs['_convert_list_indexer'])
461464
def _convert_list_indexer(self, keyarr, kind=None):
462-
"""
463-
we are passed a list indexer.
464-
Return our indexer or raise if all of the values are not included in
465-
the categories
466-
"""
465+
# Return our indexer or raise if all of the values are not included in
466+
# the categories
467467
codes = self.categories.get_indexer(keyarr)
468468
if (codes == -1).any():
469469
raise KeyError("a list-indexer must only include values that are "
470470
"in the categories")
471471

472472
return None
473473

474+
@Appender(_index_shared_docs['_convert_arr_indexer'])
475+
def _convert_arr_indexer(self, keyarr):
476+
keyarr = _asarray_tuplesafe(keyarr)
477+
return self._shallow_copy(keyarr)
478+
479+
@Appender(_index_shared_docs['_convert_index_indexer'])
480+
def _convert_index_indexer(self, keyarr):
481+
return self._shallow_copy(keyarr)
482+
474483
@Appender(_index_shared_docs['take'] % _index_doc_kwargs)
475484
def take(self, indices, axis=0, allow_fill=True,
476485
fill_value=None, **kwargs):

pandas/indexes/multi.py

+33
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,39 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
15681568

15691569
return new_index, indexer
15701570

1571+
def _convert_listlike_indexer(self, keyarr, kind=None):
1572+
"""
1573+
Parameters
1574+
----------
1575+
keyarr : list-like
1576+
Indexer to convert.
1577+
1578+
Returns
1579+
-------
1580+
tuple (indexer, keyarr)
1581+
indexer is an ndarray or None if cannot convert
1582+
keyarr are tuple-safe keys
1583+
"""
1584+
indexer, keyarr = super(MultiIndex, self)._convert_listlike_indexer(
1585+
keyarr, kind=kind)
1586+
1587+
# are we indexing a specific level
1588+
if indexer is None and len(keyarr) and not isinstance(keyarr[0],
1589+
tuple):
1590+
level = 0
1591+
_, indexer = self.reindex(keyarr, level=level)
1592+
1593+
# take all
1594+
if indexer is None:
1595+
indexer = np.arange(len(self))
1596+
1597+
check = self.levels[0].get_indexer(keyarr)
1598+
mask = check == -1
1599+
if mask.any():
1600+
raise KeyError('%s not in index' % keyarr[mask])
1601+
1602+
return indexer, keyarr
1603+
15711604
@Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
15721605
def get_indexer(self, target, method=None, limit=None, tolerance=None):
15731606
method = missing.clean_reindex_fill_method(method)

pandas/indexes/numeric.py

+1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def _convert_arr_indexer(self, keyarr):
203203
# Cast the indexer to uint64 if possible so
204204
# that the values returned from indexing are
205205
# also uint64.
206+
keyarr = _asarray_tuplesafe(keyarr)
206207
if is_integer_dtype(keyarr):
207208
return _asarray_tuplesafe(keyarr, dtype=np.uint64)
208209
return keyarr

0 commit comments

Comments
 (0)