Skip to content

Commit 5fbb683

Browse files
toobazjreback
authored andcommitted
De-duplicate code for indexing with list-likes of keys (#21503)
1 parent c2da06c commit 5fbb683

File tree

3 files changed

+116
-103
lines changed

3 files changed

+116
-103
lines changed

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2724,7 +2724,8 @@ def _getitem_array(self, key):
27242724
indexer = key.nonzero()[0]
27252725
return self._take(indexer, axis=0)
27262726
else:
2727-
indexer = self.loc._convert_to_indexer(key, axis=1)
2727+
indexer = self.loc._convert_to_indexer(key, axis=1,
2728+
raise_missing=True)
27282729
return self._take(indexer, axis=1)
27292730

27302731
def _getitem_multilevel(self, key):

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3634,7 +3634,7 @@ def _reindex_non_unique(self, target):
36343634
else:
36353635

36363636
# need to retake to have the same size as the indexer
3637-
indexer[~check] = 0
3637+
indexer[~check] = -1
36383638

36393639
# reset the new indexer to account for the new size
36403640
new_indexer = np.arange(len(self.take(indexer)))

pandas/core/indexing.py

+113-101
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,8 @@ def _align_series(self, indexer, ser, multiindex_indexer=False):
688688
if isinstance(indexer, tuple):
689689

690690
# flatten np.ndarray indexers
691-
ravel = lambda i: i.ravel() if isinstance(i, np.ndarray) else i
691+
def ravel(i):
692+
return i.ravel() if isinstance(i, np.ndarray) else i
692693
indexer = tuple(map(ravel, indexer))
693694

694695
aligners = [not com.is_null_slice(idx) for idx in indexer]
@@ -925,33 +926,10 @@ def _multi_take(self, tup):
925926
""" create the reindex map for our objects, raise the _exception if we
926927
can't create the indexer
927928
"""
928-
try:
929-
o = self.obj
930-
d = {}
931-
for key, axis in zip(tup, o._AXIS_ORDERS):
932-
ax = o._get_axis(axis)
933-
# Have the index compute an indexer or return None
934-
# if it cannot handle:
935-
indexer, keyarr = ax._convert_listlike_indexer(key,
936-
kind=self.name)
937-
# We only act on all found values:
938-
if indexer is not None and (indexer != -1).all():
939-
self._validate_read_indexer(key, indexer, axis)
940-
d[axis] = (ax[indexer], indexer)
941-
continue
942-
943-
# If we are trying to get actual keys from empty Series, we
944-
# patiently wait for a KeyError later on - otherwise, convert
945-
if len(ax) or not len(key):
946-
key = self._convert_for_reindex(key, axis)
947-
indexer = ax.get_indexer_for(key)
948-
keyarr = ax.reindex(keyarr)[0]
949-
self._validate_read_indexer(keyarr, indexer,
950-
o._get_axis_number(axis))
951-
d[axis] = (keyarr, indexer)
952-
return o._reindex_with_indexers(d, copy=True, allow_dups=True)
953-
except (KeyError, IndexingError) as detail:
954-
raise self._exception(detail)
929+
o = self.obj
930+
d = {axis: self._get_listlike_indexer(key, axis)
931+
for (key, axis) in zip(tup, o._AXIS_ORDERS)}
932+
return o._reindex_with_indexers(d, copy=True, allow_dups=True)
955933

956934
def _convert_for_reindex(self, key, axis=None):
957935
return key
@@ -1124,7 +1102,88 @@ def _getitem_axis(self, key, axis=None):
11241102

11251103
return self._get_label(key, axis=axis)
11261104

1105+
def _get_listlike_indexer(self, key, axis, raise_missing=False):
1106+
"""
1107+
Transform a list-like of keys into a new index and an indexer.
1108+
1109+
Parameters
1110+
----------
1111+
key : list-like
1112+
Target labels
1113+
axis: int
1114+
Dimension on which the indexing is being made
1115+
raise_missing: bool
1116+
Whether to raise a KeyError if some labels are not found. Will be
1117+
removed in the future, and then this method will always behave as
1118+
if raise_missing=True.
1119+
1120+
Raises
1121+
------
1122+
KeyError
1123+
If at least one key was requested but none was found, and
1124+
raise_missing=True.
1125+
1126+
Returns
1127+
-------
1128+
keyarr: Index
1129+
New index (coinciding with 'key' if the axis is unique)
1130+
values : array-like
1131+
An indexer for the return object; -1 denotes keys not found
1132+
"""
1133+
o = self.obj
1134+
ax = o._get_axis(axis)
1135+
1136+
# Have the index compute an indexer or return None
1137+
# if it cannot handle:
1138+
indexer, keyarr = ax._convert_listlike_indexer(key,
1139+
kind=self.name)
1140+
# We only act on all found values:
1141+
if indexer is not None and (indexer != -1).all():
1142+
self._validate_read_indexer(key, indexer, axis,
1143+
raise_missing=raise_missing)
1144+
return ax[indexer], indexer
1145+
1146+
if ax.is_unique:
1147+
# If we are trying to get actual keys from empty Series, we
1148+
# patiently wait for a KeyError later on - otherwise, convert
1149+
if len(ax) or not len(key):
1150+
key = self._convert_for_reindex(key, axis)
1151+
indexer = ax.get_indexer_for(key)
1152+
keyarr = ax.reindex(keyarr)[0]
1153+
else:
1154+
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1155+
1156+
self._validate_read_indexer(keyarr, indexer,
1157+
o._get_axis_number(axis),
1158+
raise_missing=raise_missing)
1159+
return keyarr, indexer
1160+
11271161
def _getitem_iterable(self, key, axis=None):
1162+
"""
1163+
Index current object with an an iterable key (which can be a boolean
1164+
indexer, or a collection of keys).
1165+
1166+
Parameters
1167+
----------
1168+
key : iterable
1169+
Target labels, or boolean indexer
1170+
axis: int, default None
1171+
Dimension on which the indexing is being made
1172+
1173+
Raises
1174+
------
1175+
KeyError
1176+
If no key was found. Will change in the future to raise if not all
1177+
keys were found.
1178+
IndexingError
1179+
If the boolean indexer is unalignable with the object being
1180+
indexed.
1181+
1182+
Returns
1183+
-------
1184+
scalar, DataFrame, or Series: indexed value(s),
1185+
"""
1186+
11281187
if axis is None:
11291188
axis = self.axis or 0
11301189

@@ -1133,54 +1192,18 @@ def _getitem_iterable(self, key, axis=None):
11331192
labels = self.obj._get_axis(axis)
11341193

11351194
if com.is_bool_indexer(key):
1195+
# A boolean indexer
11361196
key = check_bool_indexer(labels, key)
11371197
inds, = key.nonzero()
11381198
return self.obj._take(inds, axis=axis)
11391199
else:
1140-
# Have the index compute an indexer or return None
1141-
# if it cannot handle; we only act on all found values
1142-
indexer, keyarr = labels._convert_listlike_indexer(
1143-
key, kind=self.name)
1144-
if indexer is not None and (indexer != -1).all():
1145-
self._validate_read_indexer(key, indexer, axis)
1146-
return self.obj.take(indexer, axis=axis)
1147-
1148-
ax = self.obj._get_axis(axis)
1149-
# existing labels are unique and indexer are unique
1150-
if labels.is_unique and Index(keyarr).is_unique:
1151-
indexer = ax.get_indexer_for(key)
1152-
self._validate_read_indexer(key, indexer, axis)
1153-
1154-
d = {axis: [ax.reindex(keyarr)[0], indexer]}
1155-
return self.obj._reindex_with_indexers(d, copy=True,
1156-
allow_dups=True)
1157-
1158-
# existing labels are non-unique
1159-
else:
1160-
1161-
# reindex with the specified axis
1162-
if axis + 1 > self.obj.ndim:
1163-
raise AssertionError("invalid indexing error with "
1164-
"non-unique index")
1165-
1166-
new_target, indexer, new_indexer = labels._reindex_non_unique(
1167-
keyarr)
1168-
1169-
if new_indexer is not None:
1170-
result = self.obj._take(indexer[indexer != -1], axis=axis)
1171-
1172-
self._validate_read_indexer(key, new_indexer, axis)
1173-
result = result._reindex_with_indexers(
1174-
{axis: [new_target, new_indexer]},
1175-
copy=True, allow_dups=True)
1200+
# A collection of keys
1201+
keyarr, indexer = self._get_listlike_indexer(key, axis,
1202+
raise_missing=False)
1203+
return self.obj._reindex_with_indexers({axis: [keyarr, indexer]},
1204+
copy=True, allow_dups=True)
11761205

1177-
else:
1178-
self._validate_read_indexer(key, indexer, axis)
1179-
result = self.obj._take(indexer, axis=axis)
1180-
1181-
return result
1182-
1183-
def _validate_read_indexer(self, key, indexer, axis):
1206+
def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
11841207
"""
11851208
Check that indexer can be used to return a result (e.g. at least one
11861209
element was found, unless the list of keys was actually empty).
@@ -1193,11 +1216,16 @@ def _validate_read_indexer(self, key, indexer, axis):
11931216
Indices corresponding to the key (with -1 indicating not found)
11941217
axis: int
11951218
Dimension on which the indexing is being made
1219+
raise_missing: bool
1220+
Whether to raise a KeyError if some labels are not found. Will be
1221+
removed in the future, and then this method will always behave as
1222+
if raise_missing=True.
11961223
11971224
Raises
11981225
------
11991226
KeyError
1200-
If at least one key was requested none was found.
1227+
If at least one key was requested but none was found, and
1228+
raise_missing=True.
12011229
"""
12021230

12031231
ax = self.obj._get_axis(axis)
@@ -1214,6 +1242,12 @@ def _validate_read_indexer(self, key, indexer, axis):
12141242
u"None of [{key}] are in the [{axis}]".format(
12151243
key=key, axis=self.obj._get_axis_name(axis)))
12161244

1245+
# We (temporarily) allow for some missing keys with .loc, except in
1246+
# some cases (e.g. setting) in which "raise_missing" will be False
1247+
if not(self.name == 'loc' and not raise_missing):
1248+
not_found = list(set(key) - set(ax))
1249+
raise KeyError("{} not in index".format(not_found))
1250+
12171251
# we skip the warning on Categorical/Interval
12181252
# as this check is actually done (check for
12191253
# non-missing values), but a bit later in the
@@ -1229,9 +1263,10 @@ def _validate_read_indexer(self, key, indexer, axis):
12291263

12301264
if not (ax.is_categorical() or ax.is_interval()):
12311265
warnings.warn(_missing_key_warning,
1232-
FutureWarning, stacklevel=5)
1266+
FutureWarning, stacklevel=6)
12331267

1234-
def _convert_to_indexer(self, obj, axis=None, is_setter=False):
1268+
def _convert_to_indexer(self, obj, axis=None, is_setter=False,
1269+
raise_missing=False):
12351270
"""
12361271
Convert indexing key into something we can use to do actual fancy
12371272
indexing on an ndarray
@@ -1310,33 +1345,10 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False):
13101345
inds, = obj.nonzero()
13111346
return inds
13121347
else:
1313-
1314-
# Have the index compute an indexer or return None
1315-
# if it cannot handle
1316-
indexer, objarr = labels._convert_listlike_indexer(
1317-
obj, kind=self.name)
1318-
if indexer is not None:
1319-
return indexer
1320-
1321-
# unique index
1322-
if labels.is_unique:
1323-
indexer = check = labels.get_indexer(objarr)
1324-
1325-
# non-unique (dups)
1326-
else:
1327-
(indexer,
1328-
missing) = labels.get_indexer_non_unique(objarr)
1329-
# 'indexer' has dupes, create 'check' using 'missing'
1330-
check = np.zeros(len(objarr), dtype=np.intp)
1331-
check[missing] = -1
1332-
1333-
mask = check == -1
1334-
if mask.any():
1335-
raise KeyError('{mask} not in index'
1336-
.format(mask=objarr[mask]))
1337-
1338-
return com._values_from_object(indexer)
1339-
1348+
# When setting, missing keys are not allowed, even with .loc:
1349+
kwargs = {'raise_missing': True if is_setter else
1350+
raise_missing}
1351+
return self._get_listlike_indexer(obj, axis, **kwargs)[1]
13401352
else:
13411353
try:
13421354
return labels.get_loc(obj)

0 commit comments

Comments
 (0)