Skip to content

Commit 0fff817

Browse files
authored
Correct check when slicing non-monotonic datetime indexes (#54002)
* Correct check when slicing non-monotonic datetime indexes The intention of #37819 was to deprecate (removed in #49607) the special case behaviour of non-monotonic datetime indexes, so that if either slice bound is not in the index, a KeyError is raised. However, the check only fired correctly for the case where the lower bound was not in the index and either the upper bound was None or it was _also_ not in the index. Correct the logic here and adapt the one test that exercises this behaviour. Closes #53983. * Modify more tests for updated behaviour * Added whatsnew entry as bugfix
1 parent 6501fa5 commit 0fff817

File tree

4 files changed

+28
-14
lines changed

4 files changed

+28
-14
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ Categorical
433433
Datetimelike
434434
^^^^^^^^^^^^
435435
- :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`)
436+
- :meth:`DatetimeIndex.slice_indexer` now raises ``KeyError`` for non-monotonic indexes if either of the slice bounds is not in the index, this behaviour was previously deprecated but inconsistently handled. (:issue:`53983`)
436437
- Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`)
437438
- Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`)
438439
- Bug in :func:`to_datetime` converting :class:`Series` or :class:`DataFrame` containing :class:`arrays.ArrowExtensionArray` of ``pyarrow`` timestamps to numpy datetimes (:issue:`52545`)

pandas/core/indexes/datetimes.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -665,18 +665,18 @@ def check_str_or_none(point) -> bool:
665665
return Index.slice_indexer(self, start, end, step)
666666

667667
mask = np.array(True)
668-
raise_mask = np.array(True)
668+
in_index = True
669669
if start is not None:
670670
start_casted = self._maybe_cast_slice_bound(start, "left")
671671
mask = start_casted <= self
672-
raise_mask = start_casted == self
672+
in_index &= (start_casted == self).any()
673673

674674
if end is not None:
675675
end_casted = self._maybe_cast_slice_bound(end, "right")
676676
mask = (self <= end_casted) & mask
677-
raise_mask = (end_casted == self) | raise_mask
677+
in_index &= (end_casted == self).any()
678678

679-
if not raise_mask.any():
679+
if not in_index:
680680
raise KeyError(
681681
"Value based partial slicing on non-monotonic DatetimeIndexes "
682682
"with non-existing keys is not allowed.",

pandas/tests/indexing/test_partial.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -664,5 +664,14 @@ def test_slice_irregular_datetime_index_with_nan(self):
664664
index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])
665665
df = DataFrame(range(len(index)), index=index)
666666
expected = DataFrame(range(len(index[:3])), index=index[:3])
667-
result = df["2012-01-01":"2012-01-04"]
667+
with pytest.raises(KeyError, match="non-existing keys is not allowed"):
668+
# Upper bound is not in index (which is unordered)
669+
# GH53983
670+
# GH37819
671+
df["2012-01-01":"2012-01-04"]
672+
# Need this precision for right bound since the right slice
673+
# bound is "rounded" up to the largest timepoint smaller than
674+
# the next "resolution"-step of the provided point.
675+
# e.g. 2012-01-03 is rounded up to 2012-01-04 - 1ns
676+
result = df["2012-01-01":"2012-01-03 00:00:00.000000000"]
668677
tm.assert_frame_equal(result, expected)

pandas/tests/series/indexing/test_datetime.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -384,15 +384,19 @@ def compare(slobj):
384384
expected.index = expected.index._with_freq(None)
385385
tm.assert_series_equal(result, expected)
386386

387-
compare(slice("2011-01-01", "2011-01-15"))
388-
with pytest.raises(KeyError, match="Value based partial slicing on non-monotonic"):
389-
compare(slice("2010-12-30", "2011-01-15"))
390-
compare(slice("2011-01-01", "2011-01-16"))
391-
392-
# partial ranges
393-
compare(slice("2011-01-01", "2011-01-6"))
394-
compare(slice("2011-01-06", "2011-01-8"))
395-
compare(slice("2011-01-06", "2011-01-12"))
387+
for key in [
388+
slice("2011-01-01", "2011-01-15"),
389+
slice("2010-12-30", "2011-01-15"),
390+
slice("2011-01-01", "2011-01-16"),
391+
# partial ranges
392+
slice("2011-01-01", "2011-01-6"),
393+
slice("2011-01-06", "2011-01-8"),
394+
slice("2011-01-06", "2011-01-12"),
395+
]:
396+
with pytest.raises(
397+
KeyError, match="Value based partial slicing on non-monotonic"
398+
):
399+
compare(key)
396400

397401
# single values
398402
result = ts2["2011"].sort_index()

0 commit comments

Comments
 (0)