Skip to content

Commit fd2146f

Browse files
toobazjreback
authored andcommitted
BUG: fix KeyError with list of a single, missing, element (#27154)
closes #27148
1 parent a359a99 commit fd2146f

File tree

4 files changed

+32
-38
lines changed

4 files changed

+32
-38
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,7 @@ Indexing
10461046
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
10471047
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
10481048
- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`)
1049+
- Fixed a ``KeyError`` when indexing a :class:`MultiIndex`` level with a list containing exactly one label, which is missing (:issue:`27148`)
10491050
- Bug which produced ``AttributeError`` on partial matching :class:`Timestamp` in a :class:`MultiIndex` (:issue:`26944`)
10501051
- Bug in :class:`Categorical` and :class:`CategoricalIndex` with :class:`Interval` values when using the ``in`` operator (``__contains``) with objects that are not comparable to the values in the ``Interval`` (:issue:`23705`)
10511052
- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` on a :class:`DataFrame` with a single timezone-aware datetime64[ns] column incorrectly returning a scalar instead of a :class:`Series` (:issue:`27110`)

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1861,7 +1861,7 @@ def _getitem_axis(self, key, axis=None):
18611861

18621862
if (
18631863
not isinstance(key, tuple)
1864-
and len(key) > 1
1864+
and len(key)
18651865
and not isinstance(key[0], tuple)
18661866
):
18671867
key = tuple([key])

pandas/tests/indexing/multiindex/test_loc.py

+25-32
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,19 @@ def test_loc_multiindex_missing_label_raises(self):
130130
with pytest.raises(KeyError, match=r"^2$"):
131131
df.loc[2]
132132

133+
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
134+
def test_loc_multiindex_list_missing_label(self, key, pos):
135+
# GH 27148 - lists with missing labels do not raise:
136+
df = DataFrame(
137+
np.random.randn(3, 3),
138+
columns=[[2, 2, 4], [6, 8, 10]],
139+
index=[[4, 4, 8], [8, 10, 12]],
140+
)
141+
142+
expected = df.iloc[pos]
143+
result = df.loc[key]
144+
tm.assert_frame_equal(result, expected)
145+
133146
def test_loc_multiindex_too_many_dims_raises(self):
134147
# GH 14885
135148
s = Series(
@@ -280,47 +293,27 @@ def convert_nested_indexer(indexer_type, keys):
280293

281294

282295
@pytest.mark.parametrize(
283-
"indexer, is_level1, expected_error",
296+
"indexer, pos",
284297
[
285-
([], False, None), # empty ok
286-
(["A"], False, None),
287-
(["A", "D"], False, None),
288-
(["D"], False, r"\['D'\] not in index"), # not any values found
289-
(pd.IndexSlice[:, ["foo"]], True, None),
290-
(pd.IndexSlice[:, ["foo", "bah"]], True, None),
298+
([], []), # empty ok
299+
(["A"], slice(3)),
300+
(["A", "D"], slice(3)),
301+
(["D", "E"], []), # no values found - fine
302+
(["D"], []), # same, with single item list: GH 27148
303+
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
304+
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
291305
],
292306
)
293-
def test_loc_getitem_duplicates_multiindex_missing_indexers(
294-
indexer, is_level1, expected_error
295-
):
307+
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
296308
# GH 7866
297309
# multi-index slicing with missing indexers
298310
idx = MultiIndex.from_product(
299311
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
300312
)
301313
s = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
302-
303-
if indexer == []:
304-
expected = s.iloc[[]]
305-
elif is_level1:
306-
expected = Series(
307-
[0, 3, 6],
308-
index=MultiIndex.from_product(
309-
[["A", "B", "C"], ["foo"]], names=["one", "two"]
310-
),
311-
).sort_index()
312-
else:
313-
exp_idx = MultiIndex.from_product(
314-
[["A"], ["foo", "bar", "baz"]], names=["one", "two"]
315-
)
316-
expected = Series(np.arange(3, dtype="int64"), index=exp_idx).sort_index()
317-
318-
if expected_error is not None:
319-
with pytest.raises(KeyError, match=expected_error):
320-
s.loc[indexer]
321-
else:
322-
result = s.loc[indexer]
323-
tm.assert_series_equal(result, expected)
314+
expected = s.iloc[pos]
315+
result = s.loc[indexer]
316+
tm.assert_series_equal(result, expected)
324317

325318

326319
def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data):

pandas/tests/indexing/multiindex/test_slice.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,11 @@ def test_per_axis_per_level_getitem(self):
117117
with pytest.raises(ValueError):
118118
df.loc[(slice(None), np.array([True, False])), :]
119119

120-
# ambiguous cases
121-
# these can be multiply interpreted (e.g. in this case
122-
# as df.loc[slice(None),[1]] as well
123-
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
124-
df.loc[slice(None), [1]]
120+
# ambiguous notation
121+
# this is interpreted as slicing on both axes (GH #16396)
122+
result = df.loc[slice(None), [1]]
123+
expected = df.iloc[:, []]
124+
tm.assert_frame_equal(result, expected)
125125

126126
result = df.loc[(slice(None), [1]), :]
127127
expected = df.iloc[[0, 3]]

0 commit comments

Comments
 (0)