diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 07849702c646d..9a1f8d00225aa 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -323,6 +323,36 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss ... KeyError: Timestamp('1970-01-01 00:00:00') +.. _whatsnew_110.api_breaking.indexing_int_multiindex_raises_key_errors: + +Failed Integer Lookups on MultiIndex Raise KeyError +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Indexing with integers with a :class:`MultiIndex` that has a integer-dtype +first level incorrectly failed to raise ``KeyError`` when one or more of +those integer keys is not present in the first level of the index (:issue:`33539`) + +.. ipython:: python + + idx = pd.Index(range(4)) + dti = pd.date_range("2000-01-03", periods=3) + mi = pd.MultiIndex.from_product([idx, dti]) + ser = pd.Series(range(len(mi)), index=mi) + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: ser[[5]] + Out[5]: Series([], dtype: int64) + +*New behavior*: + +.. code-block:: ipython + + In [5]: ser[[5]] + ... + KeyError: '[5] not in index' + :meth:`DataFrame.merge` preserves right frame's row order ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 303365f50c546..5752f00ca5a18 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1082,37 +1082,6 @@ def _getitem_axis(self, key, axis: int): return self._getbool_axis(key, axis=axis) elif is_list_like_indexer(key): - # convert various list-like indexers - # to a list of keys - # we will use the *values* of the object - # and NOT the index if its a PandasObject - if isinstance(labels, ABCMultiIndex): - - if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1: - # Series, or 0,1 ndim ndarray - # GH 14730 - key = list(key) - elif isinstance(key, ABCDataFrame): - # GH 15438 - raise NotImplementedError( - "Indexing a MultiIndex with a " - "DataFrame key is not " - "implemented" - ) - elif hasattr(key, "ndim") and key.ndim > 1: - raise NotImplementedError( - "Indexing a MultiIndex with a " - "multidimensional key is not " - "implemented" - ) - - if ( - not isinstance(key, tuple) - and len(key) - and not isinstance(key[0], tuple) - ): - key = tuple([key]) - # an iterable multi-selection if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)): diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index b7802d9b8fe0c..f0cbdbe8d0564 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -134,16 +134,15 @@ def test_loc_multiindex_missing_label_raises(self): @pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])]) def test_loc_multiindex_list_missing_label(self, key, pos): - # GH 27148 - lists with missing labels do not raise: + # GH 27148 - lists with missing labels _do_ raise df = DataFrame( np.random.randn(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) - expected = df.iloc[pos] - result = df.loc[key] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="not in index"): + df.loc[key] def test_loc_multiindex_too_many_dims_raises(self): # GH 14885 @@ -295,8 +294,8 @@ def convert_nested_indexer(indexer_type, keys): [ ([], []), # empty ok (["A"], slice(3)), - (["A", "D"], slice(3)), - (["D", "E"], []), # no values found - fine + (["A", "D"], []), # "D" isnt present -> raise + (["D", "E"], []), # no values found -> raise (["D"], []), # same, with single item list: GH 27148 (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)), (pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)), @@ -310,8 +309,13 @@ def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos): ) s = Series(np.arange(9, dtype="int64"), index=idx).sort_index() expected = s.iloc[pos] - result = s.loc[indexer] - tm.assert_series_equal(result, expected) + + if expected.size == 0 and indexer != []: + with pytest.raises(KeyError, match=str(indexer)): + s.loc[indexer] + else: + result = s.loc[indexer] + tm.assert_series_equal(result, expected) def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data): diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index f367a92d0b006..532bb4f2e6dac 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -118,11 +118,11 @@ def test_per_axis_per_level_getitem(self): with pytest.raises(ValueError, match=msg): df.loc[(slice(None), np.array([True, False])), :] - # ambiguous notation - # this is interpreted as slicing on both axes (GH #16396) - result = df.loc[slice(None), [1]] - expected = df.iloc[:, []] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match=r"\[1\] not in index"): + # slice(None) is on the index, [1] is on the columns, but 1 is + # not in the columns, so we raise + # This used to treat [1] as positional GH#16396 + df.loc[slice(None), [1]] result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 9ce31f5f6decf..164c63483f71f 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -114,6 +114,19 @@ def test_getitem_intlist_intervalindex_non_int(self, box): result = ser[key] tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("box", [list, np.array, pd.Index]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64]) + def test_getitem_intlist_multiindex_numeric_level(self, dtype, box): + # GH#33404 do _not_ fall back to positional since ints are ambiguous + idx = pd.Index(range(4)).astype(dtype) + dti = date_range("2000-01-03", periods=3) + mi = pd.MultiIndex.from_product([idx, dti]) + ser = Series(range(len(mi))[::-1], index=mi) + + key = box([5]) + with pytest.raises(KeyError, match="5"): + ser[key] + def test_getitem_generator(string_series): gen = (x > 0 for x in string_series)