pandas-dev · jreback · Jul 12, 2019 · Jul 12, 2019 · Jul 12, 2019
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+import re
 
 import numpy as np
 import pytest
@@ -1120,9 +1121,10 @@ def test_raise_on_drop_duplicate_index(self, actual):
 
         # issue 19186
         level = 0 if isinstance(actual.index, MultiIndex) else None
-        with pytest.raises(KeyError):
+        msg = re.escape("\"['c'] not found in axis\"")
+        with pytest.raises(KeyError, match=msg):
             actual.drop("c", level=level, axis=0)
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match=msg):
             actual.T.drop("c", level=level, axis=1)
         expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore")
         assert_frame_equal(expected_no_err, actual)

diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py
@@ -1,3 +1,5 @@
+import re
+
 import numpy as np
 import pytest
 
@@ -9,11 +11,12 @@
 def test_duplicated_with_misspelled_column_name(subset):
     # GH 19730
     df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
+    msg = re.escape("Index(['a'], dtype='object')")
 
-    with pytest.raises(KeyError):
+    with pytest.raises(KeyError, match=msg):
         df.duplicated(subset)
 
-    with pytest.raises(KeyError):
+    with pytest.raises(KeyError, match=msg):
         df.drop_duplicates(subset)
 
 

diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -1,4 +1,5 @@
 from datetime import date, datetime, time, timedelta
+import re
 from warnings import catch_warnings, simplefilter
 
 import numpy as np
@@ -59,15 +60,16 @@ def test_getitem(self, float_frame):
         ad = np.random.randn(len(df))
         df["@awesome_domain"] = ad
 
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match=re.escape("'df[\"$10\"]'")):
             df.__getitem__('df["$10"]')
 
         res = df["@awesome_domain"]
         tm.assert_numpy_array_equal(ad, res.values)
 
     def test_getitem_dupe_cols(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
-        with pytest.raises(KeyError):
+        msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\""
+        with pytest.raises(KeyError, match=re.escape(msg)):
             df[["baf"]]
 
     def test_get(self, float_frame):
@@ -446,14 +448,16 @@ def test_getitem_setitem_ix_negative_integers(self, float_frame):
 
         df = DataFrame(np.random.randn(8, 4))
         # ix does label-based indexing when having an integer index
+        msg = "\"None of [Int64Index([-1], dtype='int64')] are in the [index]\""
         with catch_warnings(record=True):
             simplefilter("ignore", FutureWarning)
-            with pytest.raises(KeyError):
+            with pytest.raises(KeyError, match=re.escape(msg)):
                 df.ix[[-1]]
 
+        msg = "\"None of [Int64Index([-1], dtype='int64')] are in the [columns]\""
         with catch_warnings(record=True):
             simplefilter("ignore", FutureWarning)
-            with pytest.raises(KeyError):
+            with pytest.raises(KeyError, match=re.escape(msg)):
                 df.ix[:, [-1]]
 
         # #1942
@@ -497,7 +501,11 @@ def test_setitem(self, float_frame):
         float_frame["col6"] = series
         tm.assert_series_equal(series, float_frame["col6"], check_names=False)
 
-        with pytest.raises(KeyError):
+        msg = (
+            r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the"
+            r" \[columns\]\""
+        )
+        with pytest.raises(KeyError, match=msg):
             float_frame[np.random.randn(len(float_frame) + 1)] = 1
 
         # set ndarray
@@ -1884,10 +1892,10 @@ def test_lookup_bool(self):
         assert df["mask"].dtype == np.bool_
 
     def test_lookup_raises(self, float_frame):
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="'One or more row labels was not found'"):
             float_frame.lookup(["xyz"], ["A"])
 
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="'One or more column labels was not found'"):
             float_frame.lookup([float_frame.index[0]], ["xyz"])
 
         with pytest.raises(ValueError, match="same size"):
@@ -2543,7 +2551,9 @@ def test_xs(self, float_frame, datetime_frame):
         assert xs["A"] == 1
         assert xs["B"] == "1"
 
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')")
+        ):
             datetime_frame.xs(datetime_frame.index[0] - BDay())
 
         # xs get column

diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py
@@ -1,3 +1,5 @@
+import re
+
 import numpy as np
 import pytest
 
@@ -88,9 +90,9 @@ def test_assign_dependent_old_python(self):
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
 
         # Key C does not exist at definition time of df
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="^'C'$"):
             df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"])
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="^'C'$"):
             df.assign(C=df.A, D=lambda x: x["A"] + x["C"])
 
     @pytest.mark.skipif(
@@ -219,14 +221,14 @@ def test_delitem_multiindex(self):
         # A still in the levels, BUT get a KeyError if trying
         # to delete
         assert ("A",) not in df.columns
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match=re.escape("('A',)")):
             del df[("A",)]
 
         # behavior of dropped/deleted MultiIndex levels changed from
         # GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
         # levels which are dropped/deleted
         assert "A" not in df.columns
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match=re.escape("('A',)")):
             del df["A"]
 
     def test_pop(self, float_frame):

diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
@@ -618,7 +618,9 @@ def test_sample(sel):
             df.sample(n=1, weights="weight_column", axis=1)
 
         # Check weighting key error
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError, match="'String passed to weights not a valid column'"
+        ):
             df.sample(n=3, weights="not_a_real_column_name")
 
         # Check that re-normalizes weights that don't sum to one.

diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
@@ -206,7 +206,7 @@ def test_timegrouper_with_reg_groups(self):
             result = df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"]).sum()
             assert_frame_equal(result, expected)
 
-            with pytest.raises(KeyError):
+            with pytest.raises(KeyError, match="'The grouper name foo is not found'"):
                 df.groupby([pd.Grouper(freq="1M", key="foo"), "Buyer"]).sum()
 
             # passing the level

diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -614,7 +614,7 @@ def test_get_loc(self):
         )
         with pytest.raises(ValueError, match="unit abbreviation w/o a number"):
             idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo")
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="'2000-01-01T03'"):
             idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours")
         with pytest.raises(
             ValueError, match="tolerance size must match target index size"
@@ -634,12 +634,12 @@ def test_get_loc(self):
         assert idx.get_loc("1999", method="nearest") == 0
         assert idx.get_loc("2001", method="nearest") == 2
 
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="'1999'"):
             idx.get_loc("1999", method="pad")
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="'2001'"):
             idx.get_loc("2001", method="backfill")
 
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="'foobar'"):
             idx.get_loc("foobar")
         with pytest.raises(TypeError):
             idx.get_loc(slice(2))

diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
@@ -445,7 +445,7 @@ def test_get_loc_length_one_scalar(self, scalar, closed):
             result = index.get_loc(scalar)
             assert result == 0
         else:
-            with pytest.raises(KeyError):
+            with pytest.raises(KeyError, match=str(scalar)):
                 index.get_loc(scalar)
 
     @pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"])
@@ -458,7 +458,14 @@ def test_get_loc_length_one_interval(self, left, right, closed, other_closed):
             result = index.get_loc(interval)
             assert result == 0
         else:
-            with pytest.raises(KeyError):
+            with pytest.raises(
+                KeyError,
+                match=re.escape(
+                    "Interval({left}, {right}, closed='{other_closed}')".format(
+                        left=left, right=right, other_closed=other_closed
+                    )
+                ),
+            ):
                 index.get_loc(interval)
 
     # Make consistent with test_interval_new.py (see #16316, #16386)

diff --git a/pandas/tests/indexes/interval/test_interval_new.py b/pandas/tests/indexes/interval/test_interval_new.py
@@ -1,3 +1,5 @@
+import re
+
 import numpy as np
 import pytest
 
@@ -15,16 +17,21 @@ def test_get_loc_interval(self, closed, side):
         for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
             # if get_loc is supplied an interval, it should only search
             # for exact matches, not overlaps or covers, else KeyError.
+            msg = re.escape(
+                "Interval({bound[0]}, {bound[1]}, closed='{side}')".format(
+                    bound=bound, side=side
+                )
+            )
             if closed == side:
                 if bound == [0, 1]:
                     assert idx.get_loc(Interval(0, 1, closed=side)) == 0
                 elif bound == [2, 3]:
                     assert idx.get_loc(Interval(2, 3, closed=side)) == 1
                 else:
-                    with pytest.raises(KeyError):
+                    with pytest.raises(KeyError, match=msg):
                         idx.get_loc(Interval(*bound, closed=side))
             else:
-                with pytest.raises(KeyError):
+                with pytest.raises(KeyError, match=msg):
                     idx.get_loc(Interval(*bound, closed=side))
 
     @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
@@ -81,18 +88,42 @@ def test_slice_locs_with_interval(self):
         # unsorted duplicates
         index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
 
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError,
+            match=re.escape(
+                '"Cannot get left slice bound for non-unique label:'
+                " Interval(0, 2, closed='right')\""
+            ),
+        ):
             index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
 
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError,
+            match=re.escape(
+                '"Cannot get left slice bound for non-unique label:'
+                " Interval(0, 2, closed='right')\""
+            ),
+        ):
             index.slice_locs(start=Interval(0, 2))
 
         assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
 
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError,
+            match=re.escape(
+                '"Cannot get right slice bound for non-unique label:'
+                " Interval(0, 2, closed='right')\""
+            ),
+        ):
             index.slice_locs(end=Interval(0, 2))
 
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError,
+            match=re.escape(
+                '"Cannot get right slice bound for non-unique label:'
+                " Interval(0, 2, closed='right')\""
+            ),
+        ):
             index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
 
         # another unsorted duplicates
@@ -139,7 +170,13 @@ def test_slice_locs_with_ints_and_floats_succeeds(self):
     def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
         start, stop = query
         index = IntervalIndex.from_tuples(tuples)
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError,
+            match=(
+                "'can only get slices from an IntervalIndex if bounds are"
+                " non-overlapping and all monotonic increasing or decreasing'"
+            ),
+        ):
             index.slice_locs(start, stop)
 
     @pytest.mark.parametrize(

diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py
@@ -62,15 +62,17 @@ def test_get_loc(self, tree):
         expected = np.array([0, 1], dtype="intp")
         tm.assert_numpy_array_equal(result, expected)
 
-        with pytest.raises(KeyError):
+        with pytest.raises(KeyError, match="-1"):
             tree.get_loc(-1)
 
     def test_get_indexer(self, tree):
         result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
         expected = np.array([0, 4, -1], dtype="intp")
         tm.assert_numpy_array_equal(result, expected)
 
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError, match="'indexer does not intersect a unique set of intervals'"
+        ):
             tree.get_indexer(np.array([3.0]))
 
     def test_get_indexer_non_unique(self, tree):
@@ -100,7 +102,9 @@ def test_duplicates(self, dtype):
         expected = np.array([0, 1, 2], dtype="intp")
         tm.assert_numpy_array_equal(result, expected)
 
-        with pytest.raises(KeyError):
+        with pytest.raises(
+            KeyError, match="'indexer does not intersect a unique set of intervals'"
+        ):
             tree.get_indexer(np.array([0.5]))
 
         indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
@@ -116,7 +120,7 @@ def test_get_loc_closed(self, closed):
         tree = IntervalTree([0], [1], closed=closed)
         for p, errors in [(0, tree.open_left), (1, tree.open_right)]:
             if errors:
-                with pytest.raises(KeyError):
+                with pytest.raises(KeyError, match=str(p)):
                     tree.get_loc(p)
             else:
                 result = tree.get_loc(p)

diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py
@@ -54,7 +54,7 @@ def test_partial_string_timestamp_multiindex():
     # ambiguous and we don't want to extend this behavior forward to work
     # in multi-indexes. This would amount to selecting a scalar from a
     # column.
-    with pytest.raises(KeyError):
+    with pytest.raises(KeyError, match="'2016-01-01'"):
         df["2016-01-01"]
 
     # partial string match on year only
@@ -83,7 +83,7 @@ def test_partial_string_timestamp_multiindex():
     tm.assert_frame_equal(result, expected)
 
     # Slicing date on first level should break (of course)
-    with pytest.raises(KeyError):
+    with pytest.raises(KeyError, match="'2016-01-01'"):
         df_swap.loc["2016-01-01"]
 
     # GH12685 (partial string with daily resolution or below)

diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py
@@ -115,7 +115,7 @@ def test_unsortedindex():
     df.sort_index(inplace=True)
     assert len(df.loc(axis=0)["z", :]) == 2
 
-    with pytest.raises(KeyError):
+    with pytest.raises(KeyError, match="'q'"):
         df.loc(axis=0)["q", :]
 
 

diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py
@@ -200,7 +200,7 @@ def test_getitem_day(self):
 
             invalid = ["2013/02/01 9H", "2013/02/01 09:00"]
             for v in invalid:
-                with pytest.raises(KeyError):
+                with pytest.raises(KeyError, match=v):
                     s[v]