From edfb289e2d9ec147fbe7e1172ed06c5366ab5adb Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 23:35:41 +0100 Subject: [PATCH 1/5] [BUG]: Slicing non monotonic DatetimeIndex did not raise for non-existing keys --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/datetimes.py | 3 ++- .../indexes/datetimes/test_partial_slicing.py | 14 +++++++++----- pandas/tests/indexing/test_loc.py | 9 +++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f751a91cecf19..6806e33084530 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -470,6 +470,7 @@ Indexing - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) +- Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` did not raise ``KeyError`` when non-existing label was sliced in unordered ` Missing ^^^^^^^ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9744eb0ecbb88..288d49128b87a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -810,7 +810,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): if end is not None: end_casted = self._maybe_cast_slice_bound(end, "right", kind) mask = (self <= end_casted) & mask - + if not any(mask): + raise indexer = mask.nonzero()[0][::step] if len(indexer) == len(self): return slice(None) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 57dc46e1fb415..cf0d6afc98a47 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -315,15 +315,19 @@ def test_partial_slice_doesnt_require_monotonicity(self): s = Series(np.arange(10), date_range("2014-01-01", periods=10)) nonmonotonic = s[[3, 5, 4]] - expected = nonmonotonic.iloc[:0] timestamp = Timestamp("2014-01-10") + msg = r"Timestamp\('2014-01-10 00:00:00'\)" - tm.assert_series_equal(nonmonotonic["2014-01-10":], expected) - with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): + with pytest.raises(KeyError, match=msg): + nonmonotonic["2014-01-10":] + + with pytest.raises(KeyError, match=msg): nonmonotonic[timestamp:] - tm.assert_series_equal(nonmonotonic.loc["2014-01-10":], expected) - with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): + with pytest.raises(KeyError, match=msg): + nonmonotonic.loc["2014-01-10":] + + with pytest.raises(KeyError, match=msg): nonmonotonic.loc[timestamp:] def test_loc_datetime_length_one(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0d40b5f38e48a..f2764e4cd74a0 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1563,6 +1563,15 @@ def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice): expected = ser.iloc[expected_slice] tm.assert_series_equal(result, expected) + def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series): + # GH#18531 + obj = frame_or_series( + [1, 2, 3], + index=[pd.Timestamp("2017"), pd.Timestamp("2019"), pd.Timestamp("2018")], + ) + with pytest.raises(KeyError, match=r"Timestamp\('2020-01-01 00:00:00'\)"): + obj.loc["2020":"2022"] + class TestLocBooleanMask: def test_loc_setitem_bool_mask_timedeltaindex(self): From 68af482731c4fb7330c467cabcc37b214e9e52b2 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 23:37:11 +0100 Subject: [PATCH 2/5] Add whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/indexing/test_loc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6806e33084530..a75984426ed9c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -470,7 +470,7 @@ Indexing - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) -- Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` did not raise ``KeyError`` when non-existing label was sliced in unordered ` +- Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` did not raise ``KeyError`` when non-existing label was sliced in unordered :class:`DatetimeIndex` (:issue:`18531`) Missing ^^^^^^^ diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f2764e4cd74a0..cecaac43d5a05 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1567,7 +1567,7 @@ def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series): # GH#18531 obj = frame_or_series( [1, 2, 3], - index=[pd.Timestamp("2017"), pd.Timestamp("2019"), pd.Timestamp("2018")], + index=[Timestamp("2017"), Timestamp("2019"), Timestamp("2018")], ) with pytest.raises(KeyError, match=r"Timestamp\('2020-01-01 00:00:00'\)"): obj.loc["2020":"2022"] From c051d641fa5754f28f35ed8ea732dc6de4d33ca3 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 14:43:03 +0100 Subject: [PATCH 3/5] Rename object --- pandas/tests/indexes/datetimes/test_partial_slicing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index cf0d6afc98a47..65fb8e84a67a3 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -311,10 +311,9 @@ def test_partial_slicing_with_multiindex(self): tm.assert_frame_equal(result, expected) def test_partial_slice_doesnt_require_monotonicity(self): - # For historical reasons. - s = Series(np.arange(10), date_range("2014-01-01", periods=10)) + ser = Series(np.arange(10), date_range("2014-01-01", periods=10)) - nonmonotonic = s[[3, 5, 4]] + nonmonotonic = ser[[3, 5, 4]] timestamp = Timestamp("2014-01-10") msg = r"Timestamp\('2014-01-10 00:00:00'\)" From 069dcd9bc480755f4d677c462e68b643de9d2751 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 14:47:58 +0100 Subject: [PATCH 4/5] Add string test --- pandas/tests/indexing/test_loc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 435bde219c81a..b405731765fb9 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1573,6 +1573,10 @@ def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series): with pytest.raises(KeyError, match=r"Timestamp\('2020-01-01 00:00:00'\)"): obj.loc["2020":"2022"] + obj.index = ["a", "c", "b"] + with pytest.raises(KeyError, match=r"d"): + obj.loc["d":"e"] + class TestLocBooleanMask: def test_loc_setitem_bool_mask_timedeltaindex(self): From f24851419fbb9db93cbecac80a39b2cbc5a36d48 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 16:09:40 +0100 Subject: [PATCH 5/5] Add new testcase --- pandas/tests/indexing/test_loc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b405731765fb9..060e13d0ce8ea 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1568,11 +1568,15 @@ def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series): # GH#18531 obj = frame_or_series( [1, 2, 3], - index=[Timestamp("2017"), Timestamp("2019"), Timestamp("2018")], + index=[Timestamp("2016"), Timestamp("2019"), Timestamp("2017")], ) with pytest.raises(KeyError, match=r"Timestamp\('2020-01-01 00:00:00'\)"): obj.loc["2020":"2022"] + result = obj.loc["2018":"2022"] + expected = frame_or_series([2], index=[Timestamp("2019")]) + tm.assert_equal(result, expected) + obj.index = ["a", "c", "b"] with pytest.raises(KeyError, match=r"d"): obj.loc["d":"e"]