Skip to content

Commit 5d0faa8

Browse files
authored
BUG: Series.__getitem__ with MultiIndex and leading integer level (#33404)
1 parent fe42954 commit 5d0faa8

File tree

6 files changed

+53
-21
lines changed

6 files changed

+53
-21
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ Indexing
450450
- Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`)
451451
- Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`)
452452
- Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`)
453+
- Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`)
453454

454455
Missing
455456
^^^^^^^

pandas/core/indexes/base.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -4568,10 +4568,7 @@ def get_value(self, series: "Series", key):
45684568
-------
45694569
scalar or Series
45704570
"""
4571-
if not is_scalar(key):
4572-
# if key is not a scalar, directly raise an error (the code below
4573-
# would convert to numpy arrays and raise later any way) - GH29926
4574-
raise InvalidIndexError(key)
4571+
self._check_indexing_error(key)
45754572

45764573
try:
45774574
# GH 20882, 21257
@@ -4592,6 +4589,12 @@ def get_value(self, series: "Series", key):
45924589

45934590
return self._get_values_for_loc(series, loc, key)
45944591

4592+
def _check_indexing_error(self, key):
4593+
if not is_scalar(key):
4594+
# if key is not a scalar, directly raise an error (the code below
4595+
# would convert to numpy arrays and raise later any way) - GH29926
4596+
raise InvalidIndexError(key)
4597+
45954598
def _should_fallback_to_positional(self) -> bool:
45964599
"""
45974600
If an integer key is not found, should we fall back to positional indexing?

pandas/core/indexes/multi.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -2333,23 +2333,21 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
23332333
# --------------------------------------------------------------------
23342334
# Indexing Methods
23352335

2336-
def get_value(self, series, key):
2337-
# Label-based
2336+
def _check_indexing_error(self, key):
23382337
if not is_hashable(key) or is_iterator(key):
23392338
# We allow tuples if they are hashable, whereas other Index
23402339
# subclasses require scalar.
23412340
# We have to explicitly exclude generators, as these are hashable.
23422341
raise InvalidIndexError(key)
23432342

2344-
try:
2345-
loc = self.get_loc(key)
2346-
except KeyError:
2347-
if is_integer(key):
2348-
loc = key
2349-
else:
2350-
raise
2351-
2352-
return self._get_values_for_loc(series, loc, key)
2343+
def _should_fallback_to_positional(self) -> bool:
2344+
"""
2345+
If an integer key is not found, should we fall back to positional indexing?
2346+
"""
2347+
if not self.nlevels:
2348+
return False
2349+
# GH#33355
2350+
return self.levels[0]._should_fallback_to_positional()
23532351

23542352
def _get_values_for_loc(self, series: "Series", loc, key):
23552353
"""

pandas/tests/indexing/multiindex/test_getitem.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ def test_series_getitem_returns_scalar(
8787
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
8888
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
8989
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
90-
(lambda s: s.__getitem__(len(s)), IndexError, "is out of bounds"),
91-
(lambda s: s[len(s)], IndexError, "is out of bounds"),
90+
(lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s)
91+
(lambda s: s[len(s)], KeyError, ""), # match should include len(s)
9292
(
9393
lambda s: s.iloc[len(s)],
9494
IndexError,

pandas/tests/indexing/multiindex/test_partial.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import pytest
33

4-
from pandas import DataFrame, MultiIndex
4+
from pandas import DataFrame, Float64Index, Int64Index, MultiIndex
55
import pandas._testing as tm
66

77

@@ -126,7 +126,32 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data):
126126

127127
# this works...for now
128128
df["A"].iloc[14] = 5
129-
assert df["A"][14] == 5
129+
assert df["A"].iloc[14] == 5
130+
131+
@pytest.mark.parametrize("dtype", [int, float])
132+
def test_getitem_intkey_leading_level(
133+
self, multiindex_year_month_day_dataframe_random_data, dtype
134+
):
135+
# GH#33355 dont fall-back to positional when leading level is int
136+
ymd = multiindex_year_month_day_dataframe_random_data
137+
levels = ymd.index.levels
138+
ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
139+
ser = ymd["A"]
140+
mi = ser.index
141+
assert isinstance(mi, MultiIndex)
142+
if dtype is int:
143+
assert isinstance(mi.levels[0], Int64Index)
144+
else:
145+
assert isinstance(mi.levels[0], Float64Index)
146+
147+
assert 14 not in mi.levels[0]
148+
assert not mi.levels[0]._should_fallback_to_positional()
149+
assert not mi._should_fallback_to_positional()
150+
151+
with pytest.raises(KeyError, match="14"):
152+
ser[14]
153+
with pytest.raises(KeyError, match="14"):
154+
mi.get_value(ser, 14)
130155

131156
# ---------------------------------------------------------------------
132157
# AMBIGUOUS CASES!
@@ -140,7 +165,7 @@ def test_partial_loc_missing(self, multiindex_year_month_day_dataframe_random_da
140165
tm.assert_series_equal(result, expected)
141166

142167
# need to put in some work here
143-
168+
# FIXME: dont leave commented-out
144169
# self.ymd.loc[2000, 0] = 0
145170
# assert (self.ymd.loc[2000]['A'] == 0).all()
146171

pandas/tests/indexing/multiindex/test_setitem.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ def f(name, df2):
236236
f_index
237237
)
238238

239+
# FIXME: dont leave commented-out
239240
# TODO(wesm): unused?
240241
# new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
241242

@@ -255,7 +256,11 @@ def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data):
255256
assert notna(s.values[65:]).all()
256257

257258
s[2000, 3, 10] = np.nan
258-
assert isna(s[49])
259+
assert isna(s.iloc[49])
260+
261+
with pytest.raises(KeyError, match="49"):
262+
# GH#33355 dont fall-back to positional when leading level is int
263+
s[49]
259264

260265
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
261266
frame = multiindex_dataframe_random_data

0 commit comments

Comments
 (0)