pandas-dev · jreback · May 7, 2021 · May 7, 2021 · May 7, 2021 · jreback
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3412,7 +3412,7 @@ def __getitem__(self, key):
         else:
             if is_iterator(key):
                 key = list(key)
-            indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
+            indexer = self.loc._get_listlike_indexer(key, axis=1)[1]
 
         # take() does not accept boolean indexers
         if getattr(indexer, "dtype", None) == bool:

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -11,8 +11,6 @@
 
 import numpy as np
 
-from pandas._config.config import option_context
-
 from pandas._libs.indexing import NDFrameIndexerBase
 from pandas._libs.lib import item_from_zerodim
 from pandas.errors import (
@@ -1089,7 +1087,7 @@ def _getitem_iterable(self, key, axis: int):
         self._validate_key(key, axis)
 
         # A collection of keys
-        keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
+        keyarr, indexer = self._get_listlike_indexer(key, axis)
         return self.obj._reindex_with_indexers(
             {axis: [keyarr, indexer]}, copy=True, allow_dups=True
         )
@@ -1255,8 +1253,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
                 (inds,) = key.nonzero()
                 return inds
             else:
-                # When setting, missing keys are not allowed, even with .loc:
-                return self._get_listlike_indexer(key, axis, raise_missing=True)[1]
+                return self._get_listlike_indexer(key, axis)[1]
         else:
             try:
                 return labels.get_loc(key)
@@ -1266,7 +1263,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
                     return {"key": key}
                 raise
 
-    def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
+    def _get_listlike_indexer(self, key, axis: int):
         """
         Transform a list-like of keys into a new index and an indexer.
 
@@ -1276,16 +1273,11 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
             Targeted labels.
         axis:  int
             Dimension on which the indexing is being made.
-        raise_missing: bool, default False
-            Whether to raise a KeyError if some labels were not found.
-            Will be removed in the future, and then this method will always behave as
-            if ``raise_missing=True``.
 
         Raises
         ------
         KeyError
-            If at least one key was requested but none was found, and
-            raise_missing=True.
+            If at least one key was requested but none was found.
 
         Returns
         -------
@@ -1310,12 +1302,10 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
         else:
             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
 
-        self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
+        self._validate_read_indexer(keyarr, indexer, axis)
         return keyarr, indexer
 
-    def _validate_read_indexer(
-        self, key, indexer, axis: int, raise_missing: bool = False
-    ):
+    def _validate_read_indexer(self, key, indexer, axis: int):
         """
         Check that indexer can be used to return a result.
 
@@ -1331,16 +1321,11 @@ def _validate_read_indexer(
             (with -1 indicating not found).
         axis : int
             Dimension on which the indexing is being made.
-        raise_missing: bool
-            Whether to raise a KeyError if some labels are not found. Will be
-            removed in the future, and then this method will always behave as
-            if raise_missing=True.
 
         Raises
         ------
         KeyError
-            If at least one key was requested but none was found, and
-            raise_missing=True.
+            If at least one key was requested but none was found.
         """
         if len(key) == 0:
             return
@@ -1356,21 +1341,8 @@ def _validate_read_indexer(
 
             ax = self.obj._get_axis(axis)
 
-            # We (temporarily) allow for some missing keys with .loc, except in
-            # some cases (e.g. setting) in which "raise_missing" will be False
-            if raise_missing:
-                not_found = list(set(key) - set(ax))
-                raise KeyError(f"{not_found} not in index")
-
-            not_found = key[missing_mask]
-
-            with option_context("display.max_seq_items", 10, "display.width", 80):
-                raise KeyError(
-                    "Passing list-likes to .loc or [] with any missing labels "
-                    "is no longer supported. "
-                    f"The following labels were missing: {not_found}. "
-                    "See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"  # noqa:E501
-                )
+            not_found = list(set(key) - set(ax))
+            raise KeyError(f"{not_found} not in index")
 
 
 @doc(IndexingMixin.iloc)

diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
@@ -296,12 +296,7 @@ def test_loc_getitem_listlike_labels(self):
     def test_loc_getitem_listlike_unused_category(self):
         # GH#37901 a label that is in index.categories but not in index
         # listlike containing an element in the categories but not in the values
-        msg = (
-            "The following labels were missing: CategoricalIndex(['e'], "
-            "categories=['c', 'a', 'b', 'e'], ordered=False, name='B', "
-            "dtype='category')"
-        )
-        with pytest.raises(KeyError, match=re.escape(msg)):
+        with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
             self.df2.loc[["a", "b", "e"]]
 
     def test_loc_getitem_label_unused_category(self):
@@ -311,10 +306,7 @@ def test_loc_getitem_label_unused_category(self):
 
     def test_loc_getitem_non_category(self):
         # not all labels in the categories
-        msg = (
-            "The following labels were missing: Index(['d'], dtype='object', name='B')"
-        )
-        with pytest.raises(KeyError, match=re.escape(msg)):
+        with pytest.raises(KeyError, match=re.escape("['d'] not in index")):
             self.df2.loc[["a", "d"]]
 
     def test_loc_setitem_expansion_label_unused_category(self):
@@ -346,8 +338,7 @@ def test_loc_listlike_dtypes(self):
         exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index)
         tm.assert_frame_equal(res, exp, check_index_type=True)
 
-        msg = "The following labels were missing: Index(['x'], dtype='object')"
-        with pytest.raises(KeyError, match=re.escape(msg)):
+        with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
             df.loc[["a", "x"]]
 
     def test_loc_listlike_dtypes_duplicated_categories_and_codes(self):
@@ -370,8 +361,7 @@ def test_loc_listlike_dtypes_duplicated_categories_and_codes(self):
         )
         tm.assert_frame_equal(res, exp, check_index_type=True)
 
-        msg = "The following labels were missing: Index(['x'], dtype='object')"
-        with pytest.raises(KeyError, match=re.escape(msg)):
+        with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
             df.loc[["a", "x"]]
 
     def test_loc_listlike_dtypes_unused_category(self):
@@ -394,11 +384,10 @@ def test_loc_listlike_dtypes_unused_category(self):
         )
         tm.assert_frame_equal(res, exp, check_index_type=True)
 
-        msg = "The following labels were missing: Index(['x'], dtype='object')"
-        with pytest.raises(KeyError, match=re.escape(msg)):
+        with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
             df.loc[["a", "x"]]
 
-    def test_loc_getitem_listlike_unused_category_raises_keyerro(self):
+    def test_loc_getitem_listlike_unused_category_raises_keyerror(self):
         # key that is an *unused* category raises
         index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
         df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
@@ -407,13 +396,7 @@ def test_loc_getitem_listlike_unused_category_raises_keyerro(self):
             # For comparison, check the scalar behavior
             df.loc["e"]
 
-        msg = (
-            "Passing list-likes to .loc or [] with any missing labels is no "
-            "longer supported. The following labels were missing: "
-            "CategoricalIndex(['e'], categories=['a', 'b', 'c', 'd', 'e'], "
-            "ordered=False, dtype='category'). See https"
-        )
-        with pytest.raises(KeyError, match=re.escape(msg)):
+        with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
             df.loc[["a", "e"]]
 
     def test_ix_categorical_index(self):

diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
@@ -535,10 +535,10 @@ def test_floating_misc(self, indexer_sl):
         result2 = s.iloc[[0, 2, 4]]
         tm.assert_series_equal(result1, result2)
 
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             indexer_sl(s)[[1.6, 5, 10]]
 
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             indexer_sl(s)[[0, 1, 2]]
 
         result = indexer_sl(s)[[2.5, 5]]

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -800,7 +800,7 @@ def test_iloc_non_unique_indexing(self):
         df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
         df2 = concat([df2, 2 * df2, 3 * df2])
 
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             df2.loc[idx]
 
     def test_iloc_empty_list_indexer_is_ok(self):

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -247,12 +247,12 @@ def test_dups_fancy_indexing_not_in_order(self):
         tm.assert_frame_equal(result, expected)
 
         rows = ["C", "B", "E"]
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             df.loc[rows]
 
         # see GH5553, make sure we use the right indexer
         rows = ["F", "G", "H", "C", "B", "E"]
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             df.loc[rows]
 
     def test_dups_fancy_indexing_only_missing_label(self):
@@ -274,22 +274,22 @@ def test_dups_fancy_indexing_missing_label(self, vals):
 
         # GH 4619; duplicate indexer with missing label
         df = DataFrame({"A": vals})
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             df.loc[[0, 8, 0]]
 
     def test_dups_fancy_indexing_non_unique(self):
 
         # non unique with non unique selector
         df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             df.loc[["A", "A", "E"]]
 
     def test_dups_fancy_indexing2(self):
         # GH 5835
         # dups on index and missing values
         df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"])
 
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             df.loc[:, ["A", "B", "C"]]
 
     def test_dups_fancy_indexing3(self):

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -293,11 +293,11 @@ def test_getitem_label_list_with_missing(self):
         s = Series(range(3), index=["a", "b", "c"])
 
         # consistency
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             s[["a", "d"]]
 
         s = Series(range(3))
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             s[[0, 3]]
 
     @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]])
@@ -349,7 +349,7 @@ def test_loc_to_fail(self):
             s.loc[["4"]]
 
         s.loc[-1] = 3
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             s.loc[[-1, -2]]
 
         s["a"] = 2
@@ -396,7 +396,7 @@ def test_loc_getitem_list_with_fail(self):
             s.loc[[3]]
 
         # a non-match and a match
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             s.loc[[2, 3]]
 
     def test_loc_index(self):
@@ -2249,12 +2249,7 @@ def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box):
         ser2 = ser[:-1]
         ci2 = ci[1:]
         # but if there are no NAs present, this should raise KeyError
-        msg = (
-            r"Passing list-likes to .loc or \[\] with any missing labels is no "
-            "longer supported. The following labels were missing: "
-            r"(Categorical)?Index\(\[nan\], .*\). "
-            "See https"
-        )
+        msg = "not in index"
         with pytest.raises(KeyError, match=msg):
             ser2.loc[box(ci2)]
 
@@ -2264,41 +2259,13 @@ def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box):
         with pytest.raises(KeyError, match=msg):
             ser2.to_frame().loc[box(ci2)]
 
-    def test_loc_getitem_many_missing_labels_inside_error_message_limited(self):
-        # GH#34272
-        n = 10000
-        missing_labels = [f"missing_{label}" for label in range(n)]
-        ser = Series({"a": 1, "b": 2, "c": 3})
-        # regex checks labels between 4 and 9995 are replaced with ellipses
-        error_message_regex = "missing_4.*\\.\\.\\..*missing_9995"
-        with pytest.raises(KeyError, match=error_message_regex):
-            ser.loc[["a", "c"] + missing_labels]
-
-    def test_loc_getitem_missing_labels_inside_matched_in_error_message(self):
-        # GH#34272
-        ser = Series({"a": 1, "b": 2, "c": 3})
-        error_message_regex = "missing_0.*missing_1.*missing_2"
-        with pytest.raises(KeyError, match=error_message_regex):
-            ser.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]]
-
-    def test_loc_getitem_long_text_missing_labels_inside_error_message_limited(self):
-        # GH#34272
-        ser = Series({"a": 1, "b": 2, "c": 3})
-        missing_labels = [f"long_missing_label_text_{i}" * 5 for i in range(3)]
-        # regex checks for very long labels there are new lines between each
-        error_message_regex = (
-            "long_missing_label_text_0.*\\\\n.*long_missing_label_text_1"
-        )
-        with pytest.raises(KeyError, match=error_message_regex):
-            ser.loc[["a", "c"] + missing_labels]
-
     def test_loc_getitem_series_label_list_missing_values(self):
         # gh-11428
         key = np.array(
             ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64"
         )
         ser = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4))
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             ser.loc[key]
 
     def test_loc_getitem_series_label_list_missing_integer_values(self):
@@ -2307,7 +2274,7 @@ def test_loc_getitem_series_label_list_missing_integer_values(self):
             index=np.array([9730701000001104, 10049011000001109]),
             data=np.array([999000011000001104, 999000011000001104]),
         )
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             ser.loc[np.array([9730701000001104, 10047311000001102])]
 
     @pytest.mark.parametrize("to_period", [True, False])
@@ -2349,7 +2316,7 @@ def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period):
         if to_period:
             keys = [x.to_period("D") for x in keys]
 
-        with pytest.raises(KeyError, match="with any missing labels"):
+        with pytest.raises(KeyError, match="not in index"):
             ser.loc[keys]