From cf20270a43476f311881aa89a009f90a92509bb7 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 23 Nov 2020 00:02:56 +0100 Subject: [PATCH 01/14] BUG: loc returning wrong elements for non-monotonic DatetimeIndex --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/datetimes.py | 82 +++++++++++-------- .../indexes/datetimes/test_partial_slicing.py | 32 ++++++++ 3 files changed, 80 insertions(+), 35 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 04db52c5bfa13..19dec863ea486 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -613,6 +613,7 @@ Indexing - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) - Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) +- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` returning wrong elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1dd3eb1017eca..c5a52a430f471 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -775,42 +775,54 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): if isinstance(end, date) and not isinstance(end, datetime): end = datetime.combine(end, time(0, 0)) - try: - return Index.slice_indexer(self, start, end, step, kind=kind) - except KeyError: - # For historical reasons DatetimeIndex by default supports - # value-based partial (aka string) slices on non-monotonic arrays, - # let's try that. - if (start is None or isinstance(start, str)) and ( - end is None or isinstance(end, str) - ): - mask = np.array(True) - deprecation_mask = np.array(True) - if start is not None: - start_casted = self._maybe_cast_slice_bound(start, "left", kind) - mask = start_casted <= self - deprecation_mask = start_casted == self - - if end is not None: - end_casted = self._maybe_cast_slice_bound(end, "right", kind) - mask = (self <= end_casted) & mask - deprecation_mask = (end_casted == self) | deprecation_mask - - if not deprecation_mask.any(): - warnings.warn( - "Value based partial slicing on non-monotonic DatetimeIndexes " - "with non-existing keys is deprecated and will raise a " - "KeyError in a future Version.", - FutureWarning, - stacklevel=5, - ) - indexer = mask.nonzero()[0][::step] - if len(indexer) == len(self): - return slice(None) + # GH#33146 if start and end are combinations of str and None and Index is not + # monotonic, we can not use Index.slice_indexer because it does not honor the + # actual elements, is only searching for start and end + if not ( + ( + (start is None and isinstance(end, str)) + or (end is None and isinstance(start, str)) + or (isinstance(start, str) and isinstance(end, str)) + ) + and not self.is_monotonic_increasing + ): + try: + return Index.slice_indexer(self, start, end, step, kind=kind) + except KeyError: + # For historical reasons DatetimeIndex by default supports + # value-based partial (aka string) slices on non-monotonic arrays, + # let's try that. + if (start is None or isinstance(start, str)) and ( + end is None or isinstance(end, str) + ): + pass else: - return indexer - else: - raise + raise + mask = np.array(True) + deprecation_mask = np.array(True) + if start is not None: + start_casted = self._maybe_cast_slice_bound(start, "left", kind) + mask = start_casted <= self + deprecation_mask = start_casted == self + + if end is not None: + end_casted = self._maybe_cast_slice_bound(end, "right", kind) + mask = (self <= end_casted) & mask + deprecation_mask = (end_casted == self) | deprecation_mask + + if not deprecation_mask.any(): + warnings.warn( + "Value based partial slicing on non-monotonic DatetimeIndexes " + "with non-existing keys is deprecated and will raise a " + "KeyError in a future Version.", + FutureWarning, + stacklevel=5, + ) + indexer = mask.nonzero()[0][::step] + if len(indexer) == len(self): + return slice(None) + else: + return indexer # -------------------------------------------------------------------- diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 05ee67eee0da5..c4a986ea7bd0b 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -331,6 +331,38 @@ def test_partial_slice_doesnt_require_monotonicity(self): with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): nonmonotonic.loc[timestamp:] + @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) + def test_loc_getitem_partial_slice_non_monotonicity(self, indexer_end): + # GH#33146 + df = DataFrame( + {"a": [1] * 5}, + index=Index( + [ + Timestamp("2019-12-30"), + Timestamp("2020-01-01"), + Timestamp("2019-12-25"), + Timestamp("2020-01-02 23:59:59.999999999"), + Timestamp("2019-12-19"), + ] + ), + ) + expected = DataFrame( + {"a": [1] * 2}, + index=Index( + [ + Timestamp("2020-01-01"), + Timestamp("2020-01-02 23:59:59.999999999"), + ] + ), + ) + indexer = slice("2020-01-01", indexer_end) + + result = df[indexer] + tm.assert_frame_equal(result, expected) + + result = df.loc[indexer] + tm.assert_frame_equal(result, expected) + def test_loc_datetime_length_one(self): # GH16071 df = DataFrame( From e8ac2d2349deab57e8416e4e72b4818c41ae7338 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 30 Nov 2020 23:17:20 +0100 Subject: [PATCH 02/14] Improve whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 19dec863ea486..f55c4f6a73e76 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -613,7 +613,7 @@ Indexing - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) - Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) -- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` returning wrong elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) +- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) Missing ^^^^^^^ From b7b004798a67607f753499fe4b2b85c9976922de Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 30 Nov 2020 23:18:47 +0100 Subject: [PATCH 03/14] Add series --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f55c4f6a73e76..c0bdfbf1bba85 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -613,7 +613,7 @@ Indexing - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) - Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) -- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) +- Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) Missing ^^^^^^^ From e8aed442136418587b16ff504ccb4bec47cf5043 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 30 Nov 2020 23:20:43 +0100 Subject: [PATCH 04/14] Parametrize test --- .../indexes/datetimes/test_partial_slicing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index c4a986ea7bd0b..5949a87d12889 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -332,10 +332,10 @@ def test_partial_slice_doesnt_require_monotonicity(self): nonmonotonic.loc[timestamp:] @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) - def test_loc_getitem_partial_slice_non_monotonicity(self, indexer_end): + def test_loc_getitem_partial_slice_non_monotonicity(self, indexer_end, frame_or_series): # GH#33146 - df = DataFrame( - {"a": [1] * 5}, + df = frame_or_series( + [1] * 5, index=Index( [ Timestamp("2019-12-30"), @@ -346,8 +346,8 @@ def test_loc_getitem_partial_slice_non_monotonicity(self, indexer_end): ] ), ) - expected = DataFrame( - {"a": [1] * 2}, + expected = frame_or_series( + [1] * 2, index=Index( [ Timestamp("2020-01-01"), @@ -358,10 +358,10 @@ def test_loc_getitem_partial_slice_non_monotonicity(self, indexer_end): indexer = slice("2020-01-01", indexer_end) result = df[indexer] - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) result = df.loc[indexer] - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) def test_loc_datetime_length_one(self): # GH16071 From 3db47199afa983987c39026901c68e30f393e2f2 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 30 Nov 2020 23:36:23 +0100 Subject: [PATCH 05/14] Simplify code --- pandas/core/indexes/datetimes.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c5a52a430f471..6e21c01a241c2 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -775,29 +775,14 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): if isinstance(end, date) and not isinstance(end, datetime): end = datetime.combine(end, time(0, 0)) + def check_str_or_none(point): + return point is not None and not isinstance(point, str) # GH#33146 if start and end are combinations of str and None and Index is not # monotonic, we can not use Index.slice_indexer because it does not honor the # actual elements, is only searching for start and end - if not ( - ( - (start is None and isinstance(end, str)) - or (end is None and isinstance(start, str)) - or (isinstance(start, str) and isinstance(end, str)) - ) - and not self.is_monotonic_increasing - ): - try: - return Index.slice_indexer(self, start, end, step, kind=kind) - except KeyError: - # For historical reasons DatetimeIndex by default supports - # value-based partial (aka string) slices on non-monotonic arrays, - # let's try that. - if (start is None or isinstance(start, str)) and ( - end is None or isinstance(end, str) - ): - pass - else: - raise + if check_str_or_none(start) or check_str_or_none(end) or self.is_monotonic_increasing: + return Index.slice_indexer(self, start, end, step, kind=kind) + mask = np.array(True) deprecation_mask = np.array(True) if start is not None: From 24386fb0e2753b790a8518d764ae39018ed33f8b Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 30 Nov 2020 23:39:05 +0100 Subject: [PATCH 06/14] Run black --- pandas/core/indexes/datetimes.py | 7 ++++++- pandas/tests/indexes/datetimes/test_partial_slicing.py | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 41552e08b56c4..3e8ddabf3da60 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -782,10 +782,15 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): def check_str_or_none(point): return point is not None and not isinstance(point, str) + # GH#33146 if start and end are combinations of str and None and Index is not # monotonic, we can not use Index.slice_indexer because it does not honor the # actual elements, is only searching for start and end - if check_str_or_none(start) or check_str_or_none(end) or self.is_monotonic_increasing: + if ( + check_str_or_none(start) + or check_str_or_none(end) + or self.is_monotonic_increasing + ): return Index.slice_indexer(self, start, end, step, kind=kind) mask = np.array(True) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 5949a87d12889..259d2a9071548 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -332,7 +332,9 @@ def test_partial_slice_doesnt_require_monotonicity(self): nonmonotonic.loc[timestamp:] @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) - def test_loc_getitem_partial_slice_non_monotonicity(self, indexer_end, frame_or_series): + def test_loc_getitem_partial_slice_non_monotonicity( + self, indexer_end, frame_or_series + ): # GH#33146 df = frame_or_series( [1] * 5, From 79314393ab1fac224fa2fa02887c75cc8d9f1c9f Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 1 Dec 2020 20:32:08 +0100 Subject: [PATCH 07/14] Move test --- .../indexes/datetimes/test_partial_slicing.py | 34 ------------------- pandas/tests/indexing/test_loc.py | 34 +++++++++++++++++++ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 259d2a9071548..05ee67eee0da5 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -331,40 +331,6 @@ def test_partial_slice_doesnt_require_monotonicity(self): with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): nonmonotonic.loc[timestamp:] - @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) - def test_loc_getitem_partial_slice_non_monotonicity( - self, indexer_end, frame_or_series - ): - # GH#33146 - df = frame_or_series( - [1] * 5, - index=Index( - [ - Timestamp("2019-12-30"), - Timestamp("2020-01-01"), - Timestamp("2019-12-25"), - Timestamp("2020-01-02 23:59:59.999999999"), - Timestamp("2019-12-19"), - ] - ), - ) - expected = frame_or_series( - [1] * 2, - index=Index( - [ - Timestamp("2020-01-01"), - Timestamp("2020-01-02 23:59:59.999999999"), - ] - ), - ) - indexer = slice("2020-01-01", indexer_end) - - result = df[indexer] - tm.assert_equal(result, expected) - - result = df.loc[indexer] - tm.assert_equal(result, expected) - def test_loc_datetime_length_one(self): # GH16071 df = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index cf6c2878acd9a..05e74686f5f5d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1552,6 +1552,40 @@ def test_loc_getitem_str_timedeltaindex(self): sliced = df.loc["0 days"] tm.assert_series_equal(sliced, expected) + @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) + def test_loc_getitem_partial_slice_non_monotonicity( + self, indexer_end, frame_or_series + ): + # GH#33146 + df = frame_or_series( + [1] * 5, + index=Index( + [ + Timestamp("2019-12-30"), + Timestamp("2020-01-01"), + Timestamp("2019-12-25"), + Timestamp("2020-01-02 23:59:59.999999999"), + Timestamp("2019-12-19"), + ] + ), + ) + expected = frame_or_series( + [1] * 2, + index=Index( + [ + Timestamp("2020-01-01"), + Timestamp("2020-01-02 23:59:59.999999999"), + ] + ), + ) + indexer = slice("2020-01-01", indexer_end) + + result = df[indexer] + tm.assert_equal(result, expected) + + result = df.loc[indexer] + tm.assert_equal(result, expected) + class TestLabelSlicing: def test_loc_getitem_label_slice_across_dst(self): From b61dab93c1978c5f7bc0b84b6155abbb632454a3 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 1 Dec 2020 21:44:30 +0100 Subject: [PATCH 08/14] Add pd --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 05e74686f5f5d..f902e12b49220 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1559,7 +1559,7 @@ def test_loc_getitem_partial_slice_non_monotonicity( # GH#33146 df = frame_or_series( [1] * 5, - index=Index( + index=pd.Index( [ Timestamp("2019-12-30"), Timestamp("2020-01-01"), From 8ee5b61380fdf64fa40c8659b4ac18c11ad70281 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 1 Dec 2020 22:29:36 +0100 Subject: [PATCH 09/14] Add missing pd --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f902e12b49220..7e95926071d49 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1571,7 +1571,7 @@ def test_loc_getitem_partial_slice_non_monotonicity( ) expected = frame_or_series( [1] * 2, - index=Index( + index=pd.Index( [ Timestamp("2020-01-01"), Timestamp("2020-01-02 23:59:59.999999999"), From 406b7dcf17b9769d3cc82fc5bdbd779e0f827ebd Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 9 Dec 2020 21:52:01 +0100 Subject: [PATCH 10/14] Adress review --- pandas/tests/indexing/test_loc.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 7e95926071d49..00269a94b290c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1557,9 +1557,9 @@ def test_loc_getitem_partial_slice_non_monotonicity( self, indexer_end, frame_or_series ): # GH#33146 - df = frame_or_series( + obj = frame_or_series( [1] * 5, - index=pd.Index( + index=Index( [ Timestamp("2019-12-30"), Timestamp("2020-01-01"), @@ -1571,7 +1571,7 @@ def test_loc_getitem_partial_slice_non_monotonicity( ) expected = frame_or_series( [1] * 2, - index=pd.Index( + index=Index( [ Timestamp("2020-01-01"), Timestamp("2020-01-02 23:59:59.999999999"), @@ -1580,10 +1580,10 @@ def test_loc_getitem_partial_slice_non_monotonicity( ) indexer = slice("2020-01-01", indexer_end) - result = df[indexer] + result = obj[indexer] tm.assert_equal(result, expected) - result = df.loc[indexer] + result = obj.loc[indexer] tm.assert_equal(result, expected) @@ -1878,7 +1878,7 @@ def test_loc_set_dataframe_multiindex(): def test_loc_mixed_int_float(): # GH#19456 - ser = Series(range(2), pd.Index([1, 2.0], dtype=object)) + ser = Series(range(2), Index([1, 2.0], dtype=object)) result = ser.loc[1] assert result == 0 From f990e5378f95ab10a50e393577ce2b4cc27e4077 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 9 Dec 2020 21:53:22 +0100 Subject: [PATCH 11/14] Move whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 - doc/source/whatsnew/v1.3.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index fb1ab69832f39..af9219bc25931 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -686,7 +686,6 @@ Indexing - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) - Bug on inserting a boolean label into a :class:`DataFrame` with a numeric :class:`Index` columns incorrectly casting to integer (:issue:`36319`) - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) -- Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) - Bug in :meth:`MultiIndex.drop` does not raise if labels are partially found (:issue:`37820`) - Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) - Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 90f611c55e710..14c5cf42ea78e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -128,7 +128,7 @@ Interval Indexing ^^^^^^^^ -- +- Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`) - Missing From 65f2f88c8a3814c527a9b240ac2c6b6371c63b64 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 19 Dec 2020 00:04:38 +0100 Subject: [PATCH 12/14] Add tz fixture --- pandas/tests/indexing/test_loc.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 5eb8b2e801699..edc5bfeea8dd1 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -24,7 +24,7 @@ date_range, timedelta_range, to_datetime, - to_timedelta, + to_timedelta, DatetimeIndex, ) import pandas._testing as tm from pandas.api.types import is_scalar @@ -1554,28 +1554,30 @@ def test_loc_getitem_str_timedeltaindex(self): @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"]) def test_loc_getitem_partial_slice_non_monotonicity( - self, indexer_end, frame_or_series + self, tz_aware_fixture, indexer_end, frame_or_series ): # GH#33146 obj = frame_or_series( [1] * 5, - index=Index( + index=DatetimeIndex( [ Timestamp("2019-12-30"), Timestamp("2020-01-01"), Timestamp("2019-12-25"), Timestamp("2020-01-02 23:59:59.999999999"), Timestamp("2019-12-19"), - ] + ], + tz=tz_aware_fixture, ), ) expected = frame_or_series( [1] * 2, - index=Index( + index=DatetimeIndex( [ Timestamp("2020-01-01"), Timestamp("2020-01-02 23:59:59.999999999"), - ] + ], + tz=tz_aware_fixture ), ) indexer = slice("2020-01-01", indexer_end) From d194e48bade469747b9b51e01eb8c17829e993cb Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 19 Dec 2020 00:12:27 +0100 Subject: [PATCH 13/14] Move import --- pandas/tests/indexing/test_loc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index edc5bfeea8dd1..925f67d793cbf 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -15,6 +15,7 @@ Categorical, CategoricalIndex, DataFrame, + DatetimeIndex, Index, MultiIndex, Series, @@ -24,7 +25,7 @@ date_range, timedelta_range, to_datetime, - to_timedelta, DatetimeIndex, + to_timedelta, ) import pandas._testing as tm from pandas.api.types import is_scalar @@ -1577,7 +1578,7 @@ def test_loc_getitem_partial_slice_non_monotonicity( Timestamp("2020-01-01"), Timestamp("2020-01-02 23:59:59.999999999"), ], - tz=tz_aware_fixture + tz=tz_aware_fixture, ), ) indexer = slice("2020-01-01", indexer_end) From 2b6beb32f97d5849bf890a45c5b140a2a2d7eb74 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 3 Jan 2021 22:50:40 +0100 Subject: [PATCH 14/14] Remove pd for Index after merge --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index fa0199b9d2a00..7c73917e44b22 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1689,7 +1689,7 @@ def test_loc_getitem_slice_columns_mixed_dtype(self): # GH: 20975 df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0]) expected = DataFrame( - data=[[2, 3]], index=[0], columns=pd.Index([1, 2], dtype=object) + data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object) ) tm.assert_frame_equal(df.loc[:, 1:], expected)