Skip to content

Commit ba86e19

Browse files
authored
BUG: inconsistent validation for get_indexer (pandas-dev#41918)
1 parent 5940c9c commit ba86e19

File tree

4 files changed

+52
-32
lines changed

4 files changed

+52
-32
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,7 @@ Indexing
994994
- Bug in :meth:`DataFrame.__setitem__` raising a ``TypeError`` when using a ``str`` subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`)
995995
- Bug in :meth:`PeriodIndex.get_loc` failing to raise a ``KeyError`` when given a :class:`Period` with a mismatched ``freq`` (:issue:`41670`)
996996
- Bug ``.loc.__getitem__`` with a :class:`UInt64Index` and negative-integer keys raising ``OverflowError`` instead of ``KeyError`` in some cases, wrapping around to positive integers in others (:issue:`41777`)
997+
- Bug in :meth:`Index.get_indexer` failing to raise ``ValueError`` in some cases with invalid ``method``, ``limit``, or ``tolerance`` arguments (:issue:`41918`)
997998

998999
Missing
9991000
^^^^^^^

pandas/core/indexes/base.py

+39-23
Original file line numberDiff line numberDiff line change
@@ -3393,7 +3393,7 @@ def get_indexer(
33933393
method = missing.clean_reindex_fill_method(method)
33943394
target = self._maybe_cast_listlike_indexer(target)
33953395

3396-
self._check_indexing_method(method)
3396+
self._check_indexing_method(method, limit, tolerance)
33973397

33983398
if not self._index_as_unique:
33993399
raise InvalidIndexError(self._requires_unique_msg)
@@ -3435,39 +3435,55 @@ def _get_indexer(
34353435
elif method == "nearest":
34363436
indexer = self._get_nearest_indexer(target, limit, tolerance)
34373437
else:
3438-
if tolerance is not None:
3439-
raise ValueError(
3440-
"tolerance argument only valid if doing pad, "
3441-
"backfill or nearest reindexing"
3442-
)
3443-
if limit is not None:
3444-
raise ValueError(
3445-
"limit argument only valid if doing pad, "
3446-
"backfill or nearest reindexing"
3447-
)
3448-
34493438
indexer = self._engine.get_indexer(target._get_engine_target())
34503439

34513440
return ensure_platform_int(indexer)
34523441

34533442
@final
3454-
def _check_indexing_method(self, method: str_t | None) -> None:
3443+
def _check_indexing_method(
3444+
self,
3445+
method: str_t | None,
3446+
limit: int | None = None,
3447+
tolerance=None,
3448+
) -> None:
34553449
"""
34563450
Raise if we have a get_indexer `method` that is not supported or valid.
34573451
"""
3458-
# GH#37871 for now this is only for IntervalIndex and CategoricalIndex
3459-
if not (is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype)):
3460-
return
3452+
if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:
3453+
# in practice the clean_reindex_fill_method call would raise
3454+
# before we get here
3455+
raise ValueError("Invalid fill method") # pragma: no cover
34613456

3462-
if method is None:
3463-
return
3457+
if self._is_multi:
3458+
if method == "nearest":
3459+
raise NotImplementedError(
3460+
"method='nearest' not implemented yet "
3461+
"for MultiIndex; see GitHub issue 9365"
3462+
)
3463+
elif method == "pad" or method == "backfill":
3464+
if tolerance is not None:
3465+
raise NotImplementedError(
3466+
"tolerance not implemented yet for MultiIndex"
3467+
)
34643468

3465-
if method in ["bfill", "backfill", "pad", "ffill", "nearest"]:
3466-
raise NotImplementedError(
3467-
f"method {method} not yet implemented for {type(self).__name__}"
3468-
)
3469+
if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype):
3470+
# GH#37871 for now this is only for IntervalIndex and CategoricalIndex
3471+
if method is not None:
3472+
raise NotImplementedError(
3473+
f"method {method} not yet implemented for {type(self).__name__}"
3474+
)
34693475

3470-
raise ValueError("Invalid fill method")
3476+
if method is None:
3477+
if tolerance is not None:
3478+
raise ValueError(
3479+
"tolerance argument only valid if doing pad, "
3480+
"backfill or nearest reindexing"
3481+
)
3482+
if limit is not None:
3483+
raise ValueError(
3484+
"limit argument only valid if doing pad, "
3485+
"backfill or nearest reindexing"
3486+
)
34713487

34723488
def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:
34733489
# override this method on subclasses

pandas/core/indexes/multi.py

-9
Original file line numberDiff line numberDiff line change
@@ -2673,20 +2673,11 @@ def _get_indexer(
26732673
# gets here, and it is checking that we raise with method="nearest"
26742674

26752675
if method == "pad" or method == "backfill":
2676-
if tolerance is not None:
2677-
raise NotImplementedError(
2678-
"tolerance not implemented yet for MultiIndex"
2679-
)
26802676
# TODO: get_indexer_with_fill docstring says values must be _sorted_
26812677
# but that doesn't appear to be enforced
26822678
indexer = self._engine.get_indexer_with_fill(
26832679
target=target._values, values=self._values, method=method, limit=limit
26842680
)
2685-
elif method == "nearest":
2686-
raise NotImplementedError(
2687-
"method='nearest' not implemented yet "
2688-
"for MultiIndex; see GitHub issue 9365"
2689-
)
26902681
else:
26912682
indexer = self._engine.get_indexer(target._values)
26922683

pandas/tests/indexes/multi/test_indexing.py

+12
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,18 @@ def test_get_indexer_crossing_levels(self):
445445
expected = np.array([7, 15], dtype=pad_indexer.dtype)
446446
tm.assert_almost_equal(expected, pad_indexer)
447447

448+
def test_get_indexer_kwarg_validation(self):
449+
# GH#41918
450+
mi = MultiIndex.from_product([range(3), ["A", "B"]])
451+
452+
msg = "limit argument only valid if doing pad, backfill or nearest"
453+
with pytest.raises(ValueError, match=msg):
454+
mi.get_indexer(mi[:-1], limit=4)
455+
456+
msg = "tolerance argument only valid if doing pad, backfill or nearest"
457+
with pytest.raises(ValueError, match=msg):
458+
mi.get_indexer(mi[:-1], tolerance="piano")
459+
448460

449461
def test_getitem(idx):
450462
# scalar

0 commit comments

Comments
 (0)