diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c7ab5c979db9e..1b671aae16f14 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -992,6 +992,7 @@ Indexing - Bug in :meth:`DataFrame.__setitem__` raising a ``TypeError`` when using a ``str`` subclass as the column name with a :class:`DatetimeIndex` (:issue:`37366`) - Bug in :meth:`PeriodIndex.get_loc` failing to raise a ``KeyError`` when given a :class:`Period` with a mismatched ``freq`` (:issue:`41670`) - Bug ``.loc.__getitem__`` with a :class:`UInt64Index` and negative-integer keys raising ``OverflowError`` instead of ``KeyError`` in some cases, wrapping around to positive integers in others (:issue:`41777`) +- Bug in :meth:`Index.get_indexer` failing to raise ``ValueError`` in some cases with invalid ``method``, ``limit``, or ``tolerance`` arguments (:issue:`41918`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fcf576efb73ab..a24d8513c3709 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3393,7 +3393,7 @@ def get_indexer( method = missing.clean_reindex_fill_method(method) target = self._maybe_cast_listlike_indexer(target) - self._check_indexing_method(method) + self._check_indexing_method(method, limit, tolerance) if not self._index_as_unique: raise InvalidIndexError(self._requires_unique_msg) @@ -3435,39 +3435,55 @@ def _get_indexer( elif method == "nearest": indexer = self._get_nearest_indexer(target, limit, tolerance) else: - if tolerance is not None: - raise ValueError( - "tolerance argument only valid if doing pad, " - "backfill or nearest reindexing" - ) - if limit is not None: - raise ValueError( - "limit argument only valid if doing pad, " - "backfill or nearest reindexing" - ) - indexer = self._engine.get_indexer(target._get_engine_target()) return ensure_platform_int(indexer) @final - def _check_indexing_method(self, method: str_t | None) -> None: + def _check_indexing_method( + self, + method: str_t | None, + limit: int | None = None, + tolerance=None, + ) -> None: """ Raise if we have a get_indexer `method` that is not supported or valid. """ - # GH#37871 for now this is only for IntervalIndex and CategoricalIndex - if not (is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype)): - return + if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]: + # in practice the clean_reindex_fill_method call would raise + # before we get here + raise ValueError("Invalid fill method") # pragma: no cover - if method is None: - return + if self._is_multi: + if method == "nearest": + raise NotImplementedError( + "method='nearest' not implemented yet " + "for MultiIndex; see GitHub issue 9365" + ) + elif method == "pad" or method == "backfill": + if tolerance is not None: + raise NotImplementedError( + "tolerance not implemented yet for MultiIndex" + ) - if method in ["bfill", "backfill", "pad", "ffill", "nearest"]: - raise NotImplementedError( - f"method {method} not yet implemented for {type(self).__name__}" - ) + if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype): + # GH#37871 for now this is only for IntervalIndex and CategoricalIndex + if method is not None: + raise NotImplementedError( + f"method {method} not yet implemented for {type(self).__name__}" + ) - raise ValueError("Invalid fill method") + if method is None: + if tolerance is not None: + raise ValueError( + "tolerance argument only valid if doing pad, " + "backfill or nearest reindexing" + ) + if limit is not None: + raise ValueError( + "limit argument only valid if doing pad, " + "backfill or nearest reindexing" + ) def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray: # override this method on subclasses diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0876847aed84f..821d696200175 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2673,20 +2673,11 @@ def _get_indexer( # gets here, and it is checking that we raise with method="nearest" if method == "pad" or method == "backfill": - if tolerance is not None: - raise NotImplementedError( - "tolerance not implemented yet for MultiIndex" - ) # TODO: get_indexer_with_fill docstring says values must be _sorted_ # but that doesn't appear to be enforced indexer = self._engine.get_indexer_with_fill( target=target._values, values=self._values, method=method, limit=limit ) - elif method == "nearest": - raise NotImplementedError( - "method='nearest' not implemented yet " - "for MultiIndex; see GitHub issue 9365" - ) else: indexer = self._engine.get_indexer(target._values) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index fba94960ddaad..9e1097ce5951f 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -445,6 +445,18 @@ def test_get_indexer_crossing_levels(self): expected = np.array([7, 15], dtype=pad_indexer.dtype) tm.assert_almost_equal(expected, pad_indexer) + def test_get_indexer_kwarg_validation(self): + # GH#41918 + mi = MultiIndex.from_product([range(3), ["A", "B"]]) + + msg = "limit argument only valid if doing pad, backfill or nearest" + with pytest.raises(ValueError, match=msg): + mi.get_indexer(mi[:-1], limit=4) + + msg = "tolerance argument only valid if doing pad, backfill or nearest" + with pytest.raises(ValueError, match=msg): + mi.get_indexer(mi[:-1], tolerance="piano") + def test_getitem(idx): # scalar