Skip to content

Commit 0793721

Browse files
phoflJulianWgs
authored andcommitted
Bug in loc not raising KeyError with MultiIndex containing no longer used levels (pandas-dev#41358)
1 parent 2596cab commit 0793721

File tree

6 files changed

+41
-1
lines changed

6 files changed

+41
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ Indexing
788788
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
789789
- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
790790
- Bug in :meth:`DataFrame.__setitem__` and :meth:`DataFrame.iloc.__setitem__` raising ``ValueError`` when trying to index with a row-slice and setting a list as values (:issue:`40440`)
791+
- Bug in :meth:`DataFrame.loc` not raising ``KeyError`` when key was not found in :class:`MultiIndex` when levels contain more values than used (:issue:`41170`)
791792
- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`)
792793
- Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`)
793794
- Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`)
@@ -808,6 +809,7 @@ MultiIndex
808809
- Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in result (:issue:`38623`)
809810
- Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differently ordered (:issue:`38439`)
810811
- Bug in :meth:`MultiIndex.intersection` always returning empty when intersecting with :class:`CategoricalIndex` (:issue:`38653`)
812+
- Bug in :meth:`MultiIndex.reindex` raising ``ValueError`` with empty MultiIndex and indexing only a specific level (:issue:`41170`)
811813

812814
I/O
813815
^^^

pandas/core/indexes/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4224,7 +4224,8 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray:
42244224

42254225
else: # tie out the order with other
42264226
if level == 0: # outer most level, take the fast route
4227-
ngroups = 1 + new_lev_codes.max()
4227+
max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()
4228+
ngroups = 1 + max_new_lev
42284229
left_indexer, counts = libalgos.groupsort_indexer(
42294230
new_lev_codes, ngroups
42304231
)

pandas/core/indexes/multi.py

+5
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
from pandas.core.arrays import Categorical
7373
from pandas.core.arrays.categorical import factorize_from_iterables
7474
import pandas.core.common as com
75+
from pandas.core.indexers import is_empty_indexer
7576
import pandas.core.indexes.base as ibase
7677
from pandas.core.indexes.base import (
7778
Index,
@@ -2634,6 +2635,10 @@ def _convert_listlike_indexer(self, keyarr):
26342635
mask = check == -1
26352636
if mask.any():
26362637
raise KeyError(f"{keyarr[mask]} not in index")
2638+
elif is_empty_indexer(indexer, keyarr):
2639+
# We get here when levels still contain values which are not
2640+
# actually in Index anymore
2641+
raise KeyError(f"{keyarr} not in index")
26372642

26382643
return indexer, keyarr
26392644

pandas/tests/indexes/multi/test_reindex.py

+11
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,14 @@ def test_reindex_non_unique():
104104
msg = "cannot handle a non-unique multi-index!"
105105
with pytest.raises(ValueError, match=msg):
106106
a.reindex(new_idx)
107+
108+
109+
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
110+
def test_reindex_empty_with_level(values):
111+
# GH41170
112+
idx = MultiIndex.from_arrays(values)
113+
result, result_indexer = idx.reindex(np.array(["b"]), level=0)
114+
expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
115+
expected_indexer = np.array([], dtype=result_indexer.dtype)
116+
tm.assert_index_equal(result, expected)
117+
tm.assert_numpy_array_equal(result_indexer, expected_indexer)

pandas/tests/indexing/test_loc.py

+7
Original file line numberDiff line numberDiff line change
@@ -1624,6 +1624,13 @@ def test_loc_getitem_preserves_index_level_category_dtype(self):
16241624
result = df.loc[["a"]].index.levels[0]
16251625
tm.assert_index_equal(result, expected)
16261626

1627+
@pytest.mark.parametrize("lt_value", [30, 10])
1628+
def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value):
1629+
# GH#41170
1630+
df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]])
1631+
with pytest.raises(KeyError, match=r"\['b'\] not in index"):
1632+
df.loc[df["a"] < lt_value, :].loc[["b"], :]
1633+
16271634

16281635
class TestLocSetitemWithExpansion:
16291636
@pytest.mark.slow

pandas/tests/series/methods/test_reindex.py

+14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pandas import (
55
Categorical,
66
Index,
7+
MultiIndex,
78
NaT,
89
Period,
910
PeriodIndex,
@@ -345,3 +346,16 @@ def test_reindex_periodindex_with_object(p_values, o_values, values, expected_va
345346
result = ser.reindex(object_index)
346347
expected = Series(expected_values, index=object_index)
347348
tm.assert_series_equal(result, expected)
349+
350+
351+
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
352+
def test_reindex_empty_with_level(values):
353+
# GH41170
354+
ser = Series(
355+
range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object"
356+
)
357+
result = ser.reindex(np.array(["b"]), level=0)
358+
expected = Series(
359+
index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
360+
)
361+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)