Skip to content

Commit c6a1638

Browse files
authored
BUG: Series[listlike_of_ints] incorrect on MultiIndex (#33539)
1 parent 17dc6b0 commit c6a1638

File tree

5 files changed

+60
-44
lines changed

5 files changed

+60
-44
lines changed

doc/source/whatsnew/v1.1.0.rst

+30
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,36 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss
325325
...
326326
KeyError: Timestamp('1970-01-01 00:00:00')
327327
328+
.. _whatsnew_110.api_breaking.indexing_int_multiindex_raises_key_errors:
329+
330+
Failed Integer Lookups on MultiIndex Raise KeyError
331+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
332+
Indexing with integers with a :class:`MultiIndex` that has a integer-dtype
333+
first level incorrectly failed to raise ``KeyError`` when one or more of
334+
those integer keys is not present in the first level of the index (:issue:`33539`)
335+
336+
.. ipython:: python
337+
338+
idx = pd.Index(range(4))
339+
dti = pd.date_range("2000-01-03", periods=3)
340+
mi = pd.MultiIndex.from_product([idx, dti])
341+
ser = pd.Series(range(len(mi)), index=mi)
342+
343+
*Previous behavior*:
344+
345+
.. code-block:: ipython
346+
347+
In [5]: ser[[5]]
348+
Out[5]: Series([], dtype: int64)
349+
350+
*New behavior*:
351+
352+
.. code-block:: ipython
353+
354+
In [5]: ser[[5]]
355+
...
356+
KeyError: '[5] not in index'
357+
328358
:meth:`DataFrame.merge` preserves right frame's row order
329359
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
330360
:meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)

pandas/core/indexing.py

-31
Original file line numberDiff line numberDiff line change
@@ -1082,37 +1082,6 @@ def _getitem_axis(self, key, axis: int):
10821082
return self._getbool_axis(key, axis=axis)
10831083
elif is_list_like_indexer(key):
10841084

1085-
# convert various list-like indexers
1086-
# to a list of keys
1087-
# we will use the *values* of the object
1088-
# and NOT the index if its a PandasObject
1089-
if isinstance(labels, ABCMultiIndex):
1090-
1091-
if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1:
1092-
# Series, or 0,1 ndim ndarray
1093-
# GH 14730
1094-
key = list(key)
1095-
elif isinstance(key, ABCDataFrame):
1096-
# GH 15438
1097-
raise NotImplementedError(
1098-
"Indexing a MultiIndex with a "
1099-
"DataFrame key is not "
1100-
"implemented"
1101-
)
1102-
elif hasattr(key, "ndim") and key.ndim > 1:
1103-
raise NotImplementedError(
1104-
"Indexing a MultiIndex with a "
1105-
"multidimensional key is not "
1106-
"implemented"
1107-
)
1108-
1109-
if (
1110-
not isinstance(key, tuple)
1111-
and len(key)
1112-
and not isinstance(key[0], tuple)
1113-
):
1114-
key = tuple([key])
1115-
11161085
# an iterable multi-selection
11171086
if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)):
11181087

pandas/tests/indexing/multiindex/test_loc.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -134,16 +134,15 @@ def test_loc_multiindex_missing_label_raises(self):
134134

135135
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
136136
def test_loc_multiindex_list_missing_label(self, key, pos):
137-
# GH 27148 - lists with missing labels do not raise:
137+
# GH 27148 - lists with missing labels _do_ raise
138138
df = DataFrame(
139139
np.random.randn(3, 3),
140140
columns=[[2, 2, 4], [6, 8, 10]],
141141
index=[[4, 4, 8], [8, 10, 12]],
142142
)
143143

144-
expected = df.iloc[pos]
145-
result = df.loc[key]
146-
tm.assert_frame_equal(result, expected)
144+
with pytest.raises(KeyError, match="not in index"):
145+
df.loc[key]
147146

148147
def test_loc_multiindex_too_many_dims_raises(self):
149148
# GH 14885
@@ -295,8 +294,8 @@ def convert_nested_indexer(indexer_type, keys):
295294
[
296295
([], []), # empty ok
297296
(["A"], slice(3)),
298-
(["A", "D"], slice(3)),
299-
(["D", "E"], []), # no values found - fine
297+
(["A", "D"], []), # "D" isnt present -> raise
298+
(["D", "E"], []), # no values found -> raise
300299
(["D"], []), # same, with single item list: GH 27148
301300
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
302301
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
@@ -310,8 +309,13 @@ def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
310309
)
311310
s = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
312311
expected = s.iloc[pos]
313-
result = s.loc[indexer]
314-
tm.assert_series_equal(result, expected)
312+
313+
if expected.size == 0 and indexer != []:
314+
with pytest.raises(KeyError, match=str(indexer)):
315+
s.loc[indexer]
316+
else:
317+
result = s.loc[indexer]
318+
tm.assert_series_equal(result, expected)
315319

316320

317321
def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data):

pandas/tests/indexing/multiindex/test_slice.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,11 @@ def test_per_axis_per_level_getitem(self):
118118
with pytest.raises(ValueError, match=msg):
119119
df.loc[(slice(None), np.array([True, False])), :]
120120

121-
# ambiguous notation
122-
# this is interpreted as slicing on both axes (GH #16396)
123-
result = df.loc[slice(None), [1]]
124-
expected = df.iloc[:, []]
125-
tm.assert_frame_equal(result, expected)
121+
with pytest.raises(KeyError, match=r"\[1\] not in index"):
122+
# slice(None) is on the index, [1] is on the columns, but 1 is
123+
# not in the columns, so we raise
124+
# This used to treat [1] as positional GH#16396
125+
df.loc[slice(None), [1]]
126126

127127
result = df.loc[(slice(None), [1]), :]
128128
expected = df.iloc[[0, 3]]

pandas/tests/series/indexing/test_getitem.py

+13
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,19 @@ def test_getitem_intlist_intervalindex_non_int(self, box):
114114
result = ser[key]
115115
tm.assert_series_equal(result, expected)
116116

117+
@pytest.mark.parametrize("box", [list, np.array, pd.Index])
118+
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
119+
def test_getitem_intlist_multiindex_numeric_level(self, dtype, box):
120+
# GH#33404 do _not_ fall back to positional since ints are ambiguous
121+
idx = pd.Index(range(4)).astype(dtype)
122+
dti = date_range("2000-01-03", periods=3)
123+
mi = pd.MultiIndex.from_product([idx, dti])
124+
ser = Series(range(len(mi))[::-1], index=mi)
125+
126+
key = box([5])
127+
with pytest.raises(KeyError, match="5"):
128+
ser[key]
129+
117130

118131
def test_getitem_generator(string_series):
119132
gen = (x > 0 for x in string_series)

0 commit comments

Comments
 (0)