pandas-dev · WillAyd · Jan 20, 2019 · Jan 18, 2019 · Jan 18, 2019 · Jan 18, 2019
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -438,7 +438,7 @@ def get_converter(s):
                     return [self.indices[name] for name in names]
                 except KeyError:
                     # turns out it wasn't a tuple
-                    msg = ("must supply a a same-length tuple to get_group"
+                    msg = ("must supply a same-length tuple to get_group"
                            " with multiple grouping keys")
                     raise ValueError(msg)
 

diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
@@ -73,15 +73,18 @@ def test_generate_bins(self):
             bins = func(values, binner, closed='right')
             assert ((bins == np.array([3, 6])).all())
 
-        pytest.raises(ValueError, generate_bins_generic, values, [],
-                      'right')
-        pytest.raises(ValueError, generate_bins_generic, values[:0],
-                      binner, 'right')
-
-        pytest.raises(ValueError, generate_bins_generic, values, [4],
-                      'right')
-        pytest.raises(ValueError, generate_bins_generic, values, [-3, -1],
-                      'right')
+        msg = "Invalid length for values or for binner"
+        with pytest.raises(ValueError, match=msg):
+            generate_bins_generic(values, [], 'right')
+        with pytest.raises(ValueError, match=msg):
+            generate_bins_generic(values[:0], binner, 'right')
+
+        msg = "Values falls before first bin"
+        with pytest.raises(ValueError, match=msg):
+            generate_bins_generic(values, [4], 'right')
+        msg = "Values falls after last bin"
+        with pytest.raises(ValueError, match=msg):
+            generate_bins_generic(values, [-3, -1], 'right')
 
 
 def test_group_ohlc():

diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py
@@ -116,8 +116,9 @@ def raise_if_sum_is_zero(x):
     s = pd.Series([-1, 0, 1, 2])
     grouper = s.apply(lambda x: x % 2)
     grouped = s.groupby(grouper)
-    pytest.raises(TypeError,
-                  lambda: grouped.filter(raise_if_sum_is_zero))
+    msg = "the filter must return a boolean result"
+    with pytest.raises(TypeError, match=msg):
+        grouped.filter(raise_if_sum_is_zero)
 
 
 def test_filter_with_axis_in_groupby():
@@ -140,16 +141,28 @@ def test_filter_bad_shapes():
     g_s = s.groupby(s)
 
     f = lambda x: x
-    pytest.raises(TypeError, lambda: g_df.filter(f))
-    pytest.raises(TypeError, lambda: g_s.filter(f))
+    msg = "filter function returned a DataFrame, but expected a scalar bool"
+    with pytest.raises(TypeError, match=msg):
+        g_df.filter(f)
+    msg = "the filter must return a boolean result"
+    with pytest.raises(TypeError, match=msg):
+        g_s.filter(f)
 
     f = lambda x: x == 1
-    pytest.raises(TypeError, lambda: g_df.filter(f))
-    pytest.raises(TypeError, lambda: g_s.filter(f))
+    msg = "filter function returned a DataFrame, but expected a scalar bool"
+    with pytest.raises(TypeError, match=msg):
+        g_df.filter(f)
+    msg = "the filter must return a boolean result"
+    with pytest.raises(TypeError, match=msg):
+        g_s.filter(f)
 
     f = lambda x: np.outer(x, x)
-    pytest.raises(TypeError, lambda: g_df.filter(f))
-    pytest.raises(TypeError, lambda: g_s.filter(f))
+    msg = "can't multiply sequence by non-int of type 'str'"
+    with pytest.raises(TypeError, match=msg):
+        g_df.filter(f)
+    msg = "the filter must return a boolean result"
+    with pytest.raises(TypeError, match=msg):
+        g_s.filter(f)
 
 
 def test_filter_nan_is_false():

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -761,8 +761,11 @@ def test_frame_describe_tupleindex():
                      'z': [100, 200, 300, 400, 500] * 3})
     df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
     df2 = df1.rename(columns={'k': 'key'})
-    pytest.raises(ValueError, lambda: df1.groupby('k').describe())
-    pytest.raises(ValueError, lambda: df2.groupby('key').describe())
+    msg = "Names should be list-like for a MultiIndex"
+    with pytest.raises(ValueError, match=msg):
+        df1.groupby('k').describe()
+    with pytest.raises(ValueError, match=msg):
+        df2.groupby('key').describe()
 
 
 def test_frame_describe_unstacked_format():

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -71,7 +71,10 @@ def test_basic(dtype):
     assert agged[1] == 21
 
     # corner cases
-    pytest.raises(Exception, grouped.aggregate, lambda x: x * 2)
+    msg = "Must produce aggregated value"
+    # exception raised is type Exception
+    with pytest.raises(Exception, match=msg):
+        grouped.aggregate(lambda x: x * 2)
 
 
 def test_groupby_nonobject_dtype(mframe, df_mixed_floats):
@@ -330,12 +333,17 @@ def f3(x):
     assert_frame_equal(result1, result2)
 
     # should fail (not the same number of levels)
-    pytest.raises(AssertionError, df.groupby('a').apply, f2)
-    pytest.raises(AssertionError, df2.groupby('a').apply, f2)
+    msg = "Cannot concat indices that do not have the same number of levels"
+    with pytest.raises(AssertionError, match=msg):
+        df.groupby('a').apply(f2)
+    with pytest.raises(AssertionError, match=msg):
+        df2.groupby('a').apply(f2)
 
     # should fail (incorrect shape)
-    pytest.raises(AssertionError, df.groupby('a').apply, f3)
-    pytest.raises(AssertionError, df2.groupby('a').apply, f3)
+    with pytest.raises(AssertionError, match=msg):
+        df.groupby('a').apply(f3)
+    with pytest.raises(AssertionError, match=msg):
+        df2.groupby('a').apply(f3)
 
 
 def test_attr_wrapper(ts):
@@ -356,7 +364,9 @@ def test_attr_wrapper(ts):
     expected = grouped.agg(lambda x: x.dtype)
 
     # make sure raises error
-    pytest.raises(AttributeError, getattr, grouped, 'foo')
+    msg = "'SeriesGroupBy' object has no attribute 'foo'"
+    with pytest.raises(AttributeError, match=msg):
+        getattr(grouped, 'foo')
 
 
 def test_frame_groupby(tsframe):
@@ -664,11 +674,13 @@ def test_groupby_as_index_series_scalar(df):
 
 
 def test_groupby_as_index_corner(df, ts):
-    pytest.raises(TypeError, ts.groupby, lambda x: x.weekday(),
-                  as_index=False)
+    msg = "as_index=False only valid with DataFrame"
+    with pytest.raises(TypeError, match=msg):
+        ts.groupby(lambda x: x.weekday(), as_index=False)
 
-    pytest.raises(ValueError, df.groupby, lambda x: x.lower(),
-                  as_index=False, axis=1)
+    msg = "as_index=False only valid for axis=0"
+    with pytest.raises(ValueError, match=msg):
+        df.groupby(lambda x: x.lower(), as_index=False, axis=1)
 
 
 def test_groupby_multiple_key(df):
@@ -722,8 +734,11 @@ def test_omit_nuisance(df):
 
     # won't work with axis = 1
     grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1)
-    result = pytest.raises(TypeError, grouped.agg,
-                           lambda x: x.sum(0, numeric_only=False))
+    msg = (r'\("unsupported operand type\(s\) for \+: '
+           "'Timestamp' and 'float'"
+           '"'r", 'occurred at index 0'\)")
+    with pytest.raises(TypeError, match=msg):
+        grouped.agg(lambda x: x.sum(0, numeric_only=False))
 
 
 def test_omit_nuisance_python_multiple(three_group):
@@ -756,7 +771,9 @@ def test_empty_groups_corner(mframe):
 
 def test_nonsense_func():
     df = DataFrame([0])
-    pytest.raises(Exception, df.groupby, lambda x: x + 'foo')
+    msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'"
+    with pytest.raises(TypeError, match=msg):
+        df.groupby(lambda x: x + 'foo')
 
 
 def test_wrap_aggregated_output_multindex(mframe):
@@ -823,12 +840,22 @@ def test_groupby_level_nonmulti():
     result = s.groupby(level=[-1]).sum()
     tm.assert_series_equal(result, expected)
 
-    pytest.raises(ValueError, s.groupby, level=1)
-    pytest.raises(ValueError, s.groupby, level=-2)
-    pytest.raises(ValueError, s.groupby, level=[])
-    pytest.raises(ValueError, s.groupby, level=[0, 0])
-    pytest.raises(ValueError, s.groupby, level=[0, 1])
-    pytest.raises(ValueError, s.groupby, level=[1])
+    msg = "level > 0 or level < -1 only valid with  MultiIndex"
+    with pytest.raises(ValueError, match=msg):
+        s.groupby(level=1)
+    with pytest.raises(ValueError, match=msg):
+        s.groupby(level=-2)
+    msg = "No group keys passed!"
+    with pytest.raises(ValueError, match=msg):
+        s.groupby(level=[])
+    msg = "multiple levels only valid with MultiIndex"
+    with pytest.raises(ValueError, match=msg):
+        s.groupby(level=[0, 0])
+    with pytest.raises(ValueError, match=msg):
+        s.groupby(level=[0, 1])
+    msg = "level > 0 or level < -1 only valid with  MultiIndex"
+    with pytest.raises(ValueError, match=msg):
+        s.groupby(level=[1])
 
 
 def test_groupby_complex():
@@ -1101,7 +1128,8 @@ def test_groupby_list_infer_array_like(df):
     expected = df.groupby(df['A']).mean()
     assert_frame_equal(result, expected, check_names=False)
 
-    pytest.raises(Exception, df.groupby, list(df['A'][:-1]))
+    with pytest.raises(KeyError, match=r"^'foo'$"):
+        df.groupby(list(df['A'][:-1]))
 
     # pathological case of ambiguity
     df = DataFrame({'foo': [0, 1],
@@ -1128,10 +1156,13 @@ def test_groupby_keys_same_size_as_index():
 
 def test_groupby_one_row():
     # GH 11741
+    msg = r"^'Z'$"
     df1 = pd.DataFrame(np.random.randn(1, 4), columns=list('ABCD'))
-    pytest.raises(KeyError, df1.groupby, 'Z')
+    with pytest.raises(KeyError, match=msg):
+        df1.groupby('Z')
     df2 = pd.DataFrame(np.random.randn(2, 4), columns=list('ABCD'))
-    pytest.raises(KeyError, df2.groupby, 'Z')
+    with pytest.raises(KeyError, match=msg):
+        df2.groupby('Z')
 
 
 def test_groupby_nat_exclude():
@@ -1169,7 +1200,8 @@ def test_groupby_nat_exclude():
     tm.assert_frame_equal(
         grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]])
 
-    pytest.raises(KeyError, grouped.get_group, pd.NaT)
+    with pytest.raises(KeyError, match=r"^NaT$"):
+        grouped.get_group(pd.NaT)
 
     nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan],
                         'nat': [pd.NaT, pd.NaT, pd.NaT]})
@@ -1181,8 +1213,10 @@ def test_groupby_nat_exclude():
         assert grouped.groups == {}
         assert grouped.ngroups == 0
         assert grouped.indices == {}
-        pytest.raises(KeyError, grouped.get_group, np.nan)
-        pytest.raises(KeyError, grouped.get_group, pd.NaT)
+        with pytest.raises(KeyError, match=r"^nan$"):
+            grouped.get_group(np.nan)
+        with pytest.raises(KeyError, match=r"^NaT$"):
+            grouped.get_group(pd.NaT)
 
 
 @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
@@ -1643,7 +1677,7 @@ def test_pivot_table_values_key_error():
     df['year'] = df.set_index('eventDate').index.year
     df['month'] = df.set_index('eventDate').index.month
 
-    with pytest.raises(KeyError):
+    with pytest.raises(KeyError, match="'badname'"):
         df.reset_index().pivot_table(index='year', columns='month',
                                      values='badname', aggfunc='count')
 
@@ -1689,7 +1723,7 @@ def test_tuple_correct_keyerror():
     df = pd.DataFrame(1, index=range(3),
                       columns=pd.MultiIndex.from_product([[1, 2],
                                                           [3, 4]]))
-    with pytest.raises(KeyError, match="(7, 8)"):
+    with pytest.raises(KeyError, match=r"^\(7, 8\)$"):
         df.groupby((7, 8)).mean()
 
 

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -26,9 +26,9 @@ class TestSelection(object):
     def test_select_bad_cols(self):
         df = DataFrame([[1, 2]], columns=['A', 'B'])
         g = df.groupby('A')
-        pytest.raises(KeyError, g.__getitem__, ['C'])  # g[['C']]
+        with pytest.raises(KeyError, match='"Columns not found: '"'C'"'"'):
+            g[['C']]
 
-        pytest.raises(KeyError, g.__getitem__, ['A', 'C'])  # g[['A', 'C']]
         with pytest.raises(KeyError, match='^[^A]+$'):
             # A should not be referenced as a bad column...
             # will have to rethink regex if you change message!
@@ -39,8 +39,11 @@ def test_groupby_duplicated_column_errormsg(self):
         df = DataFrame(columns=['A', 'B', 'A', 'C'],
                        data=[range(4), range(2, 6), range(0, 8, 2)])
 
-        pytest.raises(ValueError, df.groupby, 'A')
-        pytest.raises(ValueError, df.groupby, ['A', 'B'])
+        msg = "Grouper for 'A' not 1-dimensional"
+        with pytest.raises(ValueError, match=msg):
+            df.groupby('A')
+        with pytest.raises(ValueError, match=msg):
+            df.groupby(['A', 'B'])
 
         grouped = df.groupby('B')
         c = grouped.count()
@@ -304,7 +307,8 @@ def test_grouper_iter(self, df):
 
     def test_empty_groups(self, df):
         # see gh-1048
-        pytest.raises(ValueError, df.groupby, [])
+        with pytest.raises(ValueError, match="No group keys passed!"):
+            df.groupby([])
 
     def test_groupby_grouper(self, df):
         grouped = df.groupby('A')
@@ -345,11 +349,15 @@ def test_groupby_grouper_f_sanity_checked(self):
         # when the elements are Timestamp.
         # the result is Index[0:6], very confusing.
 
-        pytest.raises(AssertionError, ts.groupby, lambda key: key[0:6])
+        msg = r"Grouper result violates len\(labels\) == len\(data\)"
+        with pytest.raises(AssertionError, match=msg):
+            ts.groupby(lambda key: key[0:6])
 
     def test_grouping_error_on_multidim_input(self, df):
-        pytest.raises(ValueError,
-                      Grouping, df.index, df[['A', 'A']])
+        msg = ("Grouper for '<class 'pandas.core.frame.DataFrame'>'"
+               " not 1-dimensional")
+        with pytest.raises(ValueError, match=msg):
+            Grouping(df.index, df[['A', 'A']])
 
     def test_multiindex_passthru(self):
 
@@ -470,14 +478,18 @@ def test_groupby_level(self, sort, mframe, df):
         assert_frame_equal(result1, expected1.T)
 
         # raise exception for non-MultiIndex
-        pytest.raises(ValueError, df.groupby, level=1)
+        msg = "level > 0 or level < -1 only valid with  MultiIndex"
+        with pytest.raises(ValueError, match=msg):
+            df.groupby(level=1)
 
     def test_groupby_level_index_names(self):
         # GH4014 this used to raise ValueError since 'exp'>1 (in py2)
         df = DataFrame({'exp': ['A'] * 3 + ['B'] * 3,
                         'var1': lrange(6), }).set_index('exp')
         df.groupby(level='exp')
-        pytest.raises(ValueError, df.groupby, level='foo')
+        msg = "level name foo is not the name of the index"
+        with pytest.raises(ValueError, match=msg):
+            df.groupby(level='foo')
 
     @pytest.mark.parametrize('sort', [True, False])
     def test_groupby_level_with_nas(self, sort):
@@ -588,10 +600,15 @@ def test_get_group(self):
         assert_frame_equal(result1, result3)
 
         # must pass a same-length tuple with multiple keys
-        pytest.raises(ValueError, lambda: g.get_group('foo'))
-        pytest.raises(ValueError, lambda: g.get_group(('foo')))
-        pytest.raises(ValueError,
-                      lambda: g.get_group(('foo', 'bar', 'baz')))
+        msg = "must supply a tuple to get_group with multiple grouping keys"
+        with pytest.raises(ValueError, match=msg):
+            g.get_group('foo')
+        with pytest.raises(ValueError, match=msg):
+            g.get_group(('foo'))
+        msg = ("must supply a same-length tuple to get_group with multiple"
+               " grouping keys")
+        with pytest.raises(ValueError, match=msg):
+            g.get_group(('foo', 'bar', 'baz'))
 
     def test_get_group_empty_bins(self, observed):
 
@@ -605,7 +622,9 @@ def test_get_group_empty_bins(self, observed):
         expected = DataFrame([3, 1], index=[0, 1])
         assert_frame_equal(result, expected)
 
-        pytest.raises(KeyError, lambda: g.get_group(pd.Interval(10, 15)))
+        msg = r"Interval\(10, 15, closed='right'\)"
+        with pytest.raises(KeyError, match=msg):
+            g.get_group(pd.Interval(10, 15))
 
     def test_get_group_grouped_by_tuple(self):
         # GH 8121