Skip to content

Commit a7a6fd4

Browse files
phoflluckyvs1
authored andcommitted
BUG: loc returning wrong elements for non-monotonic DatetimeIndex (pandas-dev#38010)
1 parent 3aac68e commit a7a6fd4

File tree

3 files changed

+77
-37
lines changed

3 files changed

+77
-37
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ Indexing
243243
^^^^^^^^
244244
- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
245245
- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
246+
- Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`)
246247
- Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`)
247248
- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`)
248249
-

pandas/core/indexes/datetimes.py

+37-35
Original file line numberDiff line numberDiff line change
@@ -776,42 +776,44 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
776776
if isinstance(end, date) and not isinstance(end, datetime):
777777
end = datetime.combine(end, time(0, 0))
778778

779-
try:
779+
def check_str_or_none(point):
780+
return point is not None and not isinstance(point, str)
781+
782+
# GH#33146 if start and end are combinations of str and None and Index is not
783+
# monotonic, we can not use Index.slice_indexer because it does not honor the
784+
# actual elements, is only searching for start and end
785+
if (
786+
check_str_or_none(start)
787+
or check_str_or_none(end)
788+
or self.is_monotonic_increasing
789+
):
780790
return Index.slice_indexer(self, start, end, step, kind=kind)
781-
except KeyError:
782-
# For historical reasons DatetimeIndex by default supports
783-
# value-based partial (aka string) slices on non-monotonic arrays,
784-
# let's try that.
785-
if (start is None or isinstance(start, str)) and (
786-
end is None or isinstance(end, str)
787-
):
788-
mask = np.array(True)
789-
deprecation_mask = np.array(True)
790-
if start is not None:
791-
start_casted = self._maybe_cast_slice_bound(start, "left", kind)
792-
mask = start_casted <= self
793-
deprecation_mask = start_casted == self
794-
795-
if end is not None:
796-
end_casted = self._maybe_cast_slice_bound(end, "right", kind)
797-
mask = (self <= end_casted) & mask
798-
deprecation_mask = (end_casted == self) | deprecation_mask
799-
800-
if not deprecation_mask.any():
801-
warnings.warn(
802-
"Value based partial slicing on non-monotonic DatetimeIndexes "
803-
"with non-existing keys is deprecated and will raise a "
804-
"KeyError in a future Version.",
805-
FutureWarning,
806-
stacklevel=5,
807-
)
808-
indexer = mask.nonzero()[0][::step]
809-
if len(indexer) == len(self):
810-
return slice(None)
811-
else:
812-
return indexer
813-
else:
814-
raise
791+
792+
mask = np.array(True)
793+
deprecation_mask = np.array(True)
794+
if start is not None:
795+
start_casted = self._maybe_cast_slice_bound(start, "left", kind)
796+
mask = start_casted <= self
797+
deprecation_mask = start_casted == self
798+
799+
if end is not None:
800+
end_casted = self._maybe_cast_slice_bound(end, "right", kind)
801+
mask = (self <= end_casted) & mask
802+
deprecation_mask = (end_casted == self) | deprecation_mask
803+
804+
if not deprecation_mask.any():
805+
warnings.warn(
806+
"Value based partial slicing on non-monotonic DatetimeIndexes "
807+
"with non-existing keys is deprecated and will raise a "
808+
"KeyError in a future Version.",
809+
FutureWarning,
810+
stacklevel=5,
811+
)
812+
indexer = mask.nonzero()[0][::step]
813+
if len(indexer) == len(self):
814+
return slice(None)
815+
else:
816+
return indexer
815817

816818
# --------------------------------------------------------------------
817819

pandas/tests/indexing/test_loc.py

+39-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Categorical,
1616
CategoricalIndex,
1717
DataFrame,
18+
DatetimeIndex,
1819
Index,
1920
MultiIndex,
2021
Series,
@@ -1556,6 +1557,42 @@ def test_loc_getitem_str_timedeltaindex(self):
15561557
sliced = df.loc["0 days"]
15571558
tm.assert_series_equal(sliced, expected)
15581559

1560+
@pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"])
1561+
def test_loc_getitem_partial_slice_non_monotonicity(
1562+
self, tz_aware_fixture, indexer_end, frame_or_series
1563+
):
1564+
# GH#33146
1565+
obj = frame_or_series(
1566+
[1] * 5,
1567+
index=DatetimeIndex(
1568+
[
1569+
Timestamp("2019-12-30"),
1570+
Timestamp("2020-01-01"),
1571+
Timestamp("2019-12-25"),
1572+
Timestamp("2020-01-02 23:59:59.999999999"),
1573+
Timestamp("2019-12-19"),
1574+
],
1575+
tz=tz_aware_fixture,
1576+
),
1577+
)
1578+
expected = frame_or_series(
1579+
[1] * 2,
1580+
index=DatetimeIndex(
1581+
[
1582+
Timestamp("2020-01-01"),
1583+
Timestamp("2020-01-02 23:59:59.999999999"),
1584+
],
1585+
tz=tz_aware_fixture,
1586+
),
1587+
)
1588+
indexer = slice("2020-01-01", indexer_end)
1589+
1590+
result = obj[indexer]
1591+
tm.assert_equal(result, expected)
1592+
1593+
result = obj.loc[indexer]
1594+
tm.assert_equal(result, expected)
1595+
15591596

15601597
class TestLabelSlicing:
15611598
def test_loc_getitem_label_slice_across_dst(self):
@@ -1652,7 +1689,7 @@ def test_loc_getitem_slice_columns_mixed_dtype(self):
16521689
# GH: 20975
16531690
df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0])
16541691
expected = DataFrame(
1655-
data=[[2, 3]], index=[0], columns=pd.Index([1, 2], dtype=object)
1692+
data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object)
16561693
)
16571694
tm.assert_frame_equal(df.loc[:, 1:], expected)
16581695

@@ -1858,7 +1895,7 @@ def test_loc_set_dataframe_multiindex():
18581895

18591896
def test_loc_mixed_int_float():
18601897
# GH#19456
1861-
ser = Series(range(2), pd.Index([1, 2.0], dtype=object))
1898+
ser = Series(range(2), Index([1, 2.0], dtype=object))
18621899

18631900
result = ser.loc[1]
18641901
assert result == 0

0 commit comments

Comments
 (0)