Correct check when slicing non-monotonic datetime indexes (#54002)

wence- · web-flow · commit 0fff817aca15 · 2023-07-19T08:41:34.000-07:00
* Correct check when slicing non-monotonic datetime indexes The intention of #37819 was to deprecate (removed in #49607) the special case behaviour of non-monotonic datetime indexes, so that if either slice bound is not in the index, a KeyError is raised. However, the check only fired correctly for the case where the lower bound was not in the index and either the upper bound was None or it was _also_ not in the index. Correct the logic here and adapt the one test that exercises this behaviour. Closes #53983. * Modify more tests for updated behaviour * Added whatsnew entry as bugfix
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -433,6 +433,7 @@ Categorical
 Datetimelike
 ^^^^^^^^^^^^
 - :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`)
+- :meth:`DatetimeIndex.slice_indexer` now raises ``KeyError`` for non-monotonic indexes if either of the slice bounds is not in the index, this behaviour was previously deprecated but inconsistently handled. (:issue:`53983`)
 - Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`)
 - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`)
 - Bug in :func:`to_datetime` converting :class:`Series` or :class:`DataFrame` containing :class:`arrays.ArrowExtensionArray` of ``pyarrow`` timestamps to numpy datetimes (:issue:`52545`)
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -665,18 +665,18 @@ def check_str_or_none(point) -> bool:
             return Index.slice_indexer(self, start, end, step)
 
         mask = np.array(True)
-        raise_mask = np.array(True)
+        in_index = True
         if start is not None:
             start_casted = self._maybe_cast_slice_bound(start, "left")
             mask = start_casted <= self
-            raise_mask = start_casted == self
+            in_index &= (start_casted == self).any()
 
         if end is not None:
             end_casted = self._maybe_cast_slice_bound(end, "right")
             mask = (self <= end_casted) & mask
-            raise_mask = (end_casted == self) | raise_mask
+            in_index &= (end_casted == self).any()
 
-        if not raise_mask.any():
+        if not in_index:
             raise KeyError(
                 "Value based partial slicing on non-monotonic DatetimeIndexes "
                 "with non-existing keys is not allowed.",
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
@@ -664,5 +664,14 @@ def test_slice_irregular_datetime_index_with_nan(self):
         index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])
         df = DataFrame(range(len(index)), index=index)
         expected = DataFrame(range(len(index[:3])), index=index[:3])
-        result = df["2012-01-01":"2012-01-04"]
+        with pytest.raises(KeyError, match="non-existing keys is not allowed"):
+            # Upper bound is not in index (which is unordered)
+            # GH53983
+            # GH37819
+            df["2012-01-01":"2012-01-04"]
+        # Need this precision for right bound since the right slice
+        # bound is "rounded" up to the largest timepoint smaller than
+        # the next "resolution"-step of the provided point.
+        # e.g. 2012-01-03 is rounded up to 2012-01-04 - 1ns
+        result = df["2012-01-01":"2012-01-03 00:00:00.000000000"]
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
@@ -384,15 +384,19 @@ def compare(slobj):
         expected.index = expected.index._with_freq(None)
         tm.assert_series_equal(result, expected)
 
-    compare(slice("2011-01-01", "2011-01-15"))
-    with pytest.raises(KeyError, match="Value based partial slicing on non-monotonic"):
-        compare(slice("2010-12-30", "2011-01-15"))
-    compare(slice("2011-01-01", "2011-01-16"))
-
-    # partial ranges
-    compare(slice("2011-01-01", "2011-01-6"))
-    compare(slice("2011-01-06", "2011-01-8"))
-    compare(slice("2011-01-06", "2011-01-12"))
+    for key in [
+        slice("2011-01-01", "2011-01-15"),
+        slice("2010-12-30", "2011-01-15"),
+        slice("2011-01-01", "2011-01-16"),
+        # partial ranges
+        slice("2011-01-01", "2011-01-6"),
+        slice("2011-01-06", "2011-01-8"),
+        slice("2011-01-06", "2011-01-12"),
+    ]:
+        with pytest.raises(
+            KeyError, match="Value based partial slicing on non-monotonic"
+        ):
+            compare(key)
 
     # single values
     result = ts2["2011"].sort_index()