Skip to content

Commit 1cc030e

Browse files
authored
Bug in loc raised Error when non-integer slice was given for MultiIndex (pandas-dev#37707)
1 parent b19e47f commit 1cc030e

File tree

5 files changed

+80
-9
lines changed

5 files changed

+80
-9
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,7 @@ Indexing
620620
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
621621
- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`)
622622
- Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`)
623+
- Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`)
623624

624625
Missing
625626
^^^^^^^

pandas/core/indexes/multi.py

+22-7
Original file line numberDiff line numberDiff line change
@@ -2764,9 +2764,17 @@ def _partial_tup_index(self, tup, side="left"):
27642764
return start + section.searchsorted(loc, side=side)
27652765

27662766
idx = self._get_loc_single_level_index(lev, lab)
2767-
if k < n - 1:
2767+
if isinstance(idx, slice) and k < n - 1:
2768+
# Get start and end value from slice, necessary when a non-integer
2769+
# interval is given as input GH#37707
2770+
start = idx.start
2771+
end = idx.stop
2772+
elif k < n - 1:
27682773
end = start + section.searchsorted(idx, side="right")
27692774
start = start + section.searchsorted(idx, side="left")
2775+
elif isinstance(idx, slice):
2776+
idx = idx.start
2777+
return start + section.searchsorted(idx, side=side)
27702778
else:
27712779
return start + section.searchsorted(idx, side=side)
27722780

@@ -3102,6 +3110,8 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
31023110
start = 0
31033111
if key.stop is not None:
31043112
stop = level_index.get_loc(key.stop)
3113+
elif isinstance(start, slice):
3114+
stop = len(level_index)
31053115
else:
31063116
stop = len(level_index) - 1
31073117
step = key.step
@@ -3136,22 +3146,27 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
31363146

31373147
else:
31383148

3139-
code = self._get_loc_single_level_index(level_index, key)
3149+
idx = self._get_loc_single_level_index(level_index, key)
31403150

31413151
if level > 0 or self.lexsort_depth == 0:
31423152
# Desired level is not sorted
3143-
locs = np.array(level_codes == code, dtype=bool, copy=False)
3153+
locs = np.array(level_codes == idx, dtype=bool, copy=False)
31443154
if not locs.any():
31453155
# The label is present in self.levels[level] but unused:
31463156
raise KeyError(key)
31473157
return locs
31483158

3149-
i = level_codes.searchsorted(code, side="left")
3150-
j = level_codes.searchsorted(code, side="right")
3151-
if i == j:
3159+
if isinstance(idx, slice):
3160+
start = idx.start
3161+
end = idx.stop
3162+
else:
3163+
start = level_codes.searchsorted(idx, side="left")
3164+
end = level_codes.searchsorted(idx, side="right")
3165+
3166+
if start == end:
31523167
# The label is present in self.levels[level] but unused:
31533168
raise KeyError(key)
3154-
return slice(i, j)
3169+
return slice(start, end)
31553170

31563171
def get_locs(self, seq):
31573172
"""

pandas/tests/indexes/multi/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ def test_timestamp_multiindex_indexer():
768768
[
769769
pd.date_range(
770770
start="2019-01-02T00:15:33",
771-
end="2019-01-05T02:15:33",
771+
end="2019-01-05T03:15:33",
772772
freq="H",
773773
name="date",
774774
),

pandas/tests/indexing/multiindex/test_loc.py

+9
Original file line numberDiff line numberDiff line change
@@ -659,3 +659,12 @@ def test_getitem_non_found_tuple():
659659
)
660660
with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
661661
df.loc[(2.0, 2.0, 3.0)]
662+
663+
664+
def test_get_loc_datetime_index():
665+
# GH#24263
666+
index = pd.date_range("2001-01-01", periods=100)
667+
mi = MultiIndex.from_arrays([index])
668+
# Check if get_loc matches for Index and MultiIndex
669+
assert mi.get_loc("2001-01") == slice(0, 31, None)
670+
assert index.get_loc("2001-01") == slice(0, 31, None)

pandas/tests/indexing/multiindex/test_partial.py

+47-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
import numpy as np
22
import pytest
33

4-
from pandas import DataFrame, Float64Index, Int64Index, MultiIndex
4+
from pandas import (
5+
DataFrame,
6+
Float64Index,
7+
Int64Index,
8+
MultiIndex,
9+
date_range,
10+
to_datetime,
11+
)
512
import pandas._testing as tm
613

714

@@ -208,6 +215,45 @@ def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
208215
expected.loc["bar"] = 0
209216
tm.assert_series_equal(result, expected)
210217

218+
@pytest.mark.parametrize(
219+
"indexer, exp_idx, exp_values",
220+
[
221+
(slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]),
222+
(
223+
slice(None, "2019-2"),
224+
date_range("2019", periods=2, freq="MS"),
225+
[0, 1, 2, 3],
226+
),
227+
],
228+
)
229+
def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
230+
# GH: 25165
231+
date_idx = date_range("2019", periods=2, freq="MS")
232+
df = DataFrame(
233+
list(range(4)),
234+
index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
235+
)
236+
expected = DataFrame(
237+
exp_values,
238+
index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
239+
)
240+
result = df[indexer]
241+
tm.assert_frame_equal(result, expected)
242+
result = df.loc[indexer]
243+
tm.assert_frame_equal(result, expected)
244+
245+
result = df.loc(axis=0)[indexer]
246+
tm.assert_frame_equal(result, expected)
247+
248+
result = df.loc[indexer, :]
249+
tm.assert_frame_equal(result, expected)
250+
251+
df2 = df.swaplevel(0, 1).sort_index()
252+
expected = expected.swaplevel(0, 1).sort_index()
253+
254+
result = df2.loc[:, indexer, :]
255+
tm.assert_frame_equal(result, expected)
256+
211257

212258
def test_loc_getitem_partial_both_axis():
213259
# gh-12660

0 commit comments

Comments
 (0)