From e211d74e859a0cec2c2f20c9502889deed5be945 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 5 Jul 2023 11:06:18 +0100 Subject: [PATCH 1/3] Correct check when slicing non-monotonic datetime indexes The intention of #37819 was to deprecate (removed in #49607) the special case behaviour of non-monotonic datetime indexes, so that if either slice bound is not in the index, a KeyError is raised. However, the check only fired correctly for the case where the lower bound was not in the index and either the upper bound was None or it was _also_ not in the index. Correct the logic here and adapt the one test that exercises this behaviour. Closes #53983. --- pandas/core/indexes/datetimes.py | 8 ++++---- pandas/tests/indexing/test_partial.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1500bcef5d4d9..95a6e4a9a26ae 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -666,18 +666,18 @@ def check_str_or_none(point) -> bool: return Index.slice_indexer(self, start, end, step) mask = np.array(True) - raise_mask = np.array(True) + in_index = True if start is not None: start_casted = self._maybe_cast_slice_bound(start, "left") mask = start_casted <= self - raise_mask = start_casted == self + in_index &= (start_casted == self).any() if end is not None: end_casted = self._maybe_cast_slice_bound(end, "right") mask = (self <= end_casted) & mask - raise_mask = (end_casted == self) | raise_mask + in_index &= (end_casted == self).any() - if not raise_mask.any(): + if not in_index: raise KeyError( "Value based partial slicing on non-monotonic DatetimeIndexes " "with non-existing keys is not allowed.", diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 5d1d4ba6f638a..bc6e8aed449f3 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -664,5 +664,14 @@ def test_slice_irregular_datetime_index_with_nan(self): index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None]) df = DataFrame(range(len(index)), index=index) expected = DataFrame(range(len(index[:3])), index=index[:3]) - result = df["2012-01-01":"2012-01-04"] + with pytest.raises(KeyError, match="non-existing keys is not allowed"): + # Upper bound is not in index (which is unordered) + # GH53983 + # GH37819 + df["2012-01-01":"2012-01-04"] + # Need this precision for right bound since the right slice + # bound is "rounded" up to the largest timepoint smaller than + # the next "resolution"-step of the provided point. + # e.g. 2012-01-03 is rounded up to 2012-01-04 - 1ns + result = df["2012-01-01":"2012-01-03 00:00:00.000000000"] tm.assert_frame_equal(result, expected) From 6edff75126ba1500052b569e03a4567b98c8b8c7 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 5 Jul 2023 12:21:54 +0100 Subject: [PATCH 2/3] Modify more tests for updated behaviour --- pandas/tests/series/indexing/test_datetime.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index f47e344336a8b..072607c29fd4c 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -384,15 +384,19 @@ def compare(slobj): expected.index = expected.index._with_freq(None) tm.assert_series_equal(result, expected) - compare(slice("2011-01-01", "2011-01-15")) - with pytest.raises(KeyError, match="Value based partial slicing on non-monotonic"): - compare(slice("2010-12-30", "2011-01-15")) - compare(slice("2011-01-01", "2011-01-16")) - - # partial ranges - compare(slice("2011-01-01", "2011-01-6")) - compare(slice("2011-01-06", "2011-01-8")) - compare(slice("2011-01-06", "2011-01-12")) + for key in [ + slice("2011-01-01", "2011-01-15"), + slice("2010-12-30", "2011-01-15"), + slice("2011-01-01", "2011-01-16"), + # partial ranges + slice("2011-01-01", "2011-01-6"), + slice("2011-01-06", "2011-01-8"), + slice("2011-01-06", "2011-01-12"), + ]: + with pytest.raises( + KeyError, match="Value based partial slicing on non-monotonic" + ): + compare(key) # single values result = ts2["2011"].sort_index() From e5815512d07a2dab78a51acf6e534fe706885e4f Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 19 Jul 2023 11:59:43 +0100 Subject: [PATCH 3/3] Added whatsnew entry as bugfix --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6390fbeed8548..8644caef21de2 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -368,6 +368,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`) +- :meth:`DatetimeIndex.slice_indexer` now raises ``KeyError`` for non-monotonic indexes if either of the slice bounds is not in the index, this behaviour was previously deprecated but inconsistently handled. (:issue:`53983`) - Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`) - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) - Bug in :meth:`DataFrame.to_sql` raising ``ValueError`` for pyarrow-backed date like dtypes (:issue:`53854`)