Skip to content

Commit fad3303

Browse files
jbrockmendelfeefladder
authored andcommitted
REF: implement Index._get_indexer_strict (pandas-dev#42485)
1 parent cc099b7 commit fad3303

File tree

4 files changed

+106
-105
lines changed

4 files changed

+106
-105
lines changed

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3452,7 +3452,7 @@ def __getitem__(self, key):
34523452
else:
34533453
if is_iterator(key):
34543454
key = list(key)
3455-
indexer = self.loc._get_listlike_indexer(key, axis=1)[1]
3455+
indexer = self.columns._get_indexer_strict(key, "columns")[1]
34563456

34573457
# take() does not accept boolean indexers
34583458
if getattr(indexer, "dtype", None) == bool:

pandas/core/indexes/base.py

+83
Original file line numberDiff line numberDiff line change
@@ -5384,6 +5384,89 @@ def get_indexer_for(self, target) -> npt.NDArray[np.intp]:
53845384
indexer, _ = self.get_indexer_non_unique(target)
53855385
return indexer
53865386

5387+
def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:
5388+
"""
5389+
Analogue to get_indexer that raises if any elements are missing.
5390+
"""
5391+
keyarr = key
5392+
if not isinstance(keyarr, Index):
5393+
keyarr = com.asarray_tuplesafe(keyarr)
5394+
5395+
if self._index_as_unique:
5396+
indexer = self.get_indexer_for(keyarr)
5397+
keyarr = self.reindex(keyarr)[0]
5398+
else:
5399+
keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
5400+
5401+
self._raise_if_missing(keyarr, indexer, axis_name)
5402+
5403+
if (
5404+
needs_i8_conversion(self.dtype)
5405+
or is_categorical_dtype(self.dtype)
5406+
or is_interval_dtype(self.dtype)
5407+
):
5408+
# For CategoricalIndex take instead of reindex to preserve dtype.
5409+
# For IntervalIndex this is to map integers to the Intervals they match to.
5410+
keyarr = self.take(indexer)
5411+
if keyarr.dtype.kind in ["m", "M"]:
5412+
# DTI/TDI.take can infer a freq in some cases when we dont want one
5413+
if isinstance(key, list) or (
5414+
isinstance(key, type(self))
5415+
# "Index" has no attribute "freq"
5416+
and key.freq is None # type: ignore[attr-defined]
5417+
):
5418+
keyarr = keyarr._with_freq(None)
5419+
5420+
return keyarr, indexer
5421+
5422+
def _raise_if_missing(self, key, indexer, axis_name: str_t):
5423+
"""
5424+
Check that indexer can be used to return a result.
5425+
5426+
e.g. at least one element was found,
5427+
unless the list of keys was actually empty.
5428+
5429+
Parameters
5430+
----------
5431+
key : list-like
5432+
Targeted labels (only used to show correct error message).
5433+
indexer: array-like of booleans
5434+
Indices corresponding to the key,
5435+
(with -1 indicating not found).
5436+
axis_name : str
5437+
5438+
Raises
5439+
------
5440+
KeyError
5441+
If at least one key was requested but none was found.
5442+
"""
5443+
if len(key) == 0:
5444+
return
5445+
5446+
# Count missing values
5447+
missing_mask = indexer < 0
5448+
nmissing = missing_mask.sum()
5449+
5450+
if nmissing:
5451+
5452+
# TODO: remove special-case; this is just to keep exception
5453+
# message tests from raising while debugging
5454+
use_interval_msg = is_interval_dtype(self.dtype) or (
5455+
is_categorical_dtype(self.dtype)
5456+
# "Index" has no attribute "categories" [attr-defined]
5457+
and is_interval_dtype(
5458+
self.categories.dtype # type: ignore[attr-defined]
5459+
)
5460+
)
5461+
5462+
if nmissing == len(indexer):
5463+
if use_interval_msg:
5464+
key = list(key)
5465+
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
5466+
5467+
not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
5468+
raise KeyError(f"{not_found} not in index")
5469+
53875470
@overload
53885471
def _get_indexer_non_comparable(
53895472
self, target: Index, method, unique: Literal[True] = ...

pandas/core/indexes/multi.py

+20-16
Original file line numberDiff line numberDiff line change
@@ -2541,24 +2541,28 @@ def _get_values_for_loc(self, series: Series, loc, key):
25412541
new_ser = series._constructor(new_values, index=new_index, name=series.name)
25422542
return new_ser.__finalize__(series)
25432543

2544-
def _convert_listlike_indexer(self, keyarr) -> np.ndarray | None:
2545-
"""
2546-
Analogous to get_indexer when we are partial-indexing on our first level.
2547-
2548-
Parameters
2549-
----------
2550-
keyarr : Index, np.ndarray, or ExtensionArray
2551-
Indexer to convert.
2544+
def _get_indexer_strict(self, key, axis_name: str) -> tuple[Index, np.ndarray]:
25522545

2553-
Returns
2554-
-------
2555-
np.ndarray[intp] or None
2556-
"""
2557-
indexer = None
2546+
keyarr = key
2547+
if not isinstance(keyarr, Index):
2548+
keyarr = com.asarray_tuplesafe(keyarr)
25582549

2559-
# are we indexing a specific level
25602550
if len(keyarr) and not isinstance(keyarr[0], tuple):
25612551
indexer = self._get_indexer_level_0(keyarr)
2552+
2553+
self._raise_if_missing(key, indexer, axis_name)
2554+
return self[indexer], indexer
2555+
2556+
return super()._get_indexer_strict(key, axis_name)
2557+
2558+
def _raise_if_missing(self, key, indexer, axis_name: str):
2559+
keyarr = key
2560+
if not isinstance(key, Index):
2561+
keyarr = com.asarray_tuplesafe(key)
2562+
2563+
if len(keyarr) and not isinstance(keyarr[0], tuple):
2564+
# i.e. same condition for special case in MultiIndex._get_indexer_strict
2565+
25622566
mask = indexer == -1
25632567
if mask.any():
25642568
check = self.levels[0].get_indexer(keyarr)
@@ -2568,8 +2572,8 @@ def _convert_listlike_indexer(self, keyarr) -> np.ndarray | None:
25682572
# We get here when levels still contain values which are not
25692573
# actually in Index anymore
25702574
raise KeyError(f"{keyarr} not in index")
2571-
2572-
return indexer
2575+
else:
2576+
return super()._raise_if_missing(key, indexer, axis_name)
25732577

25742578
def _get_indexer_level_0(self, target) -> np.ndarray:
25752579
"""

pandas/core/indexing.py

+2-88
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
is_object_dtype,
3131
is_scalar,
3232
is_sequence,
33-
needs_i8_conversion,
3433
)
3534
from pandas.core.dtypes.concat import concat_compat
3635
from pandas.core.dtypes.generic import (
@@ -56,11 +55,8 @@
5655
length_of_indexer,
5756
)
5857
from pandas.core.indexes.api import (
59-
CategoricalIndex,
6058
Index,
61-
IntervalIndex,
6259
MultiIndex,
63-
ensure_index,
6460
)
6561

6662
if TYPE_CHECKING:
@@ -1293,94 +1289,12 @@ def _get_listlike_indexer(self, key, axis: int):
12931289
Indexer for the return object, -1 denotes keys not found.
12941290
"""
12951291
ax = self.obj._get_axis(axis)
1292+
axis_name = self.obj._get_axis_name(axis)
12961293

1297-
keyarr = key
1298-
if not isinstance(keyarr, Index):
1299-
keyarr = com.asarray_tuplesafe(keyarr)
1300-
1301-
if isinstance(ax, MultiIndex):
1302-
# get_indexer expects a MultiIndex or sequence of tuples, but
1303-
# we may be doing partial-indexing, so need an extra check
1304-
1305-
# Have the index compute an indexer or return None
1306-
# if it cannot handle:
1307-
indexer = ax._convert_listlike_indexer(keyarr)
1308-
# We only act on all found values:
1309-
if indexer is not None and (indexer != -1).all():
1310-
# _validate_read_indexer is a no-op if no -1s, so skip
1311-
return ax[indexer], indexer
1312-
1313-
if ax._index_as_unique:
1314-
indexer = ax.get_indexer_for(keyarr)
1315-
keyarr = ax.reindex(keyarr)[0]
1316-
else:
1317-
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1318-
1319-
self._validate_read_indexer(keyarr, indexer, axis)
1320-
1321-
if needs_i8_conversion(ax.dtype) or isinstance(
1322-
ax, (IntervalIndex, CategoricalIndex)
1323-
):
1324-
# For CategoricalIndex take instead of reindex to preserve dtype.
1325-
# For IntervalIndex this is to map integers to the Intervals they match to.
1326-
keyarr = ax.take(indexer)
1327-
if keyarr.dtype.kind in ["m", "M"]:
1328-
# DTI/TDI.take can infer a freq in some cases when we dont want one
1329-
if isinstance(key, list) or (
1330-
isinstance(key, type(ax)) and key.freq is None
1331-
):
1332-
keyarr = keyarr._with_freq(None)
1294+
keyarr, indexer = ax._get_indexer_strict(key, axis_name)
13331295

13341296
return keyarr, indexer
13351297

1336-
def _validate_read_indexer(self, key, indexer, axis: int):
1337-
"""
1338-
Check that indexer can be used to return a result.
1339-
1340-
e.g. at least one element was found,
1341-
unless the list of keys was actually empty.
1342-
1343-
Parameters
1344-
----------
1345-
key : list-like
1346-
Targeted labels (only used to show correct error message).
1347-
indexer: array-like of booleans
1348-
Indices corresponding to the key,
1349-
(with -1 indicating not found).
1350-
axis : int
1351-
Dimension on which the indexing is being made.
1352-
1353-
Raises
1354-
------
1355-
KeyError
1356-
If at least one key was requested but none was found.
1357-
"""
1358-
if len(key) == 0:
1359-
return
1360-
1361-
# Count missing values:
1362-
missing_mask = indexer < 0
1363-
missing = (missing_mask).sum()
1364-
1365-
if missing:
1366-
ax = self.obj._get_axis(axis)
1367-
1368-
# TODO: remove special-case; this is just to keep exception
1369-
# message tests from raising while debugging
1370-
use_interval_msg = isinstance(ax, IntervalIndex) or (
1371-
isinstance(ax, CategoricalIndex)
1372-
and isinstance(ax.categories, IntervalIndex)
1373-
)
1374-
1375-
if missing == len(indexer):
1376-
axis_name = self.obj._get_axis_name(axis)
1377-
if use_interval_msg:
1378-
key = list(key)
1379-
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
1380-
1381-
not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
1382-
raise KeyError(f"{not_found} not in index")
1383-
13841298

13851299
@doc(IndexingMixin.iloc)
13861300
class _iLocIndexer(_LocationIndexer):

0 commit comments

Comments
 (0)