pandas-dev · jreback · Jan 19, 2019 · Jan 19, 2019 · Jan 19, 2019 · Jan 19, 2019
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1087,7 +1087,7 @@ def _validate(self, validate):
         elif validate in ["one_to_many", "1:m"]:
             if not left_unique:
                 raise MergeError("Merge keys are not unique in left dataset;"
-                                 "not a one-to-many merge")
+                                 " not a one-to-many merge")
 
         elif validate in ["many_to_one", "m:1"]:
             if not right_unique:

diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
@@ -195,38 +195,47 @@ def test_join_on(self):
         assert np.isnan(joined['three']['c'])
 
         # merge column not p resent
-        pytest.raises(KeyError, target.join, source, on='E')
+        with pytest.raises(KeyError, match="^'E'$"):
+            target.join(source, on='E')
 
         # overlap
         source_copy = source.copy()
         source_copy['A'] = 0
-        pytest.raises(ValueError, target.join, source_copy, on='A')
+        msg = ("You are trying to merge on float64 and object columns. If"
+               " you wish to proceed you should use pd.concat")
+        with pytest.raises(ValueError, match=msg):
+            target.join(source_copy, on='A')
 
     def test_join_on_fails_with_different_right_index(self):
-        with pytest.raises(ValueError):
-            df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
-                            'b': np.random.randn(3)})
-            df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
-                             'b': np.random.randn(10)},
-                            index=tm.makeCustomIndex(10, 2))
+        df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
+                        'b': np.random.randn(3)})
+        df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
+                         'b': np.random.randn(10)},
+                        index=tm.makeCustomIndex(10, 2))
+        msg = (r'len\(left_on\) must equal the number of levels in the index'
+               ' of "right"')
+        with pytest.raises(ValueError, match=msg):
             merge(df, df2, left_on='a', right_index=True)
 
     def test_join_on_fails_with_different_left_index(self):
-        with pytest.raises(ValueError):
-            df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
-                            'b': np.random.randn(3)},
-                           index=tm.makeCustomIndex(10, 2))
-            df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
-                             'b': np.random.randn(10)})
+        df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
+                        'b': np.random.randn(3)},
+                       index=tm.makeCustomIndex(3, 2))
+        df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
+                         'b': np.random.randn(10)})
+        msg = (r'len\(right_on\) must equal the number of levels in the index'
+               ' of "left"')
+        with pytest.raises(ValueError, match=msg):
             merge(df, df2, right_on='b', left_index=True)
 
     def test_join_on_fails_with_different_column_counts(self):
-        with pytest.raises(ValueError):
-            df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
-                            'b': np.random.randn(3)})
-            df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
-                             'b': np.random.randn(10)},
-                            index=tm.makeCustomIndex(10, 2))
+        df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
+                        'b': np.random.randn(3)})
+        df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
+                         'b': np.random.randn(10)},
+                        index=tm.makeCustomIndex(10, 2))
+        msg = r"len\(right_on\) must equal len\(left_on\)"
+        with pytest.raises(ValueError, match=msg):
             merge(df, df2, right_on='a', left_on=['a', 'b'])
 
     @pytest.mark.parametrize("wrong_type", [2, 'str', None, np.array([0, 1])])
@@ -237,9 +246,11 @@ def test_join_on_fails_with_wrong_object_type(self, wrong_type):
         # Edited test to remove the Series object from test parameters
 
         df = DataFrame({'a': [1, 1]})
-        with pytest.raises(TypeError, match=str(type(wrong_type))):
+        msg = ("Can only merge Series or DataFrame objects, a {} was passed"
+               .format(str(type(wrong_type))))
+        with pytest.raises(TypeError, match=msg):
             merge(wrong_type, df, left_on='a', right_on='a')
-        with pytest.raises(TypeError, match=str(type(wrong_type))):
+        with pytest.raises(TypeError, match=msg):
             merge(df, wrong_type, left_on='a', right_on='a')
 
     def test_join_on_pass_vector(self):
@@ -603,7 +614,9 @@ def _check_diff_index(df_list, result, exp_index):
         joined = df_list[0].join(df_list[1:], how='inner')
         _check_diff_index(df_list, joined, df.index[2:8])
 
-        pytest.raises(ValueError, df_list[0].join, df_list[1:], on='a')
+        msg = "Joining multiple DataFrames only supported for joining on index"
+        with pytest.raises(ValueError, match=msg):
+            df_list[0].join(df_list[1:], on='a')
 
     def test_join_many_mixed(self):
         df = DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
@@ -725,10 +738,13 @@ def test_panel_join_many(self):
             tm.assert_panel_equal(joined, expected)
 
             # edge cases
-            pytest.raises(ValueError, panels[0].join, panels[1:],
-                          how='outer', lsuffix='foo', rsuffix='bar')
-            pytest.raises(ValueError, panels[0].join, panels[1:],
-                          how='right')
+            msg = "Suffixes not supported when passing multiple panels"
+            with pytest.raises(ValueError, match=msg):
+                panels[0].join(panels[1:], how='outer', lsuffix='foo',
+                               rsuffix='bar')
+            msg = "Right join not supported with multiple panels"
+            with pytest.raises(ValueError, match=msg):
+                panels[0].join(panels[1:], how='right')
 
     def test_join_multi_to_multi(self, join_type):
         # GH 20475
@@ -749,10 +765,12 @@ def test_join_multi_to_multi(self, join_type):
                     )
         assert_frame_equal(expected, result)
 
-        with pytest.raises(ValueError):
+        msg = (r'len\(left_on\) must equal the number of levels in the index'
+               ' of "right"')
+        with pytest.raises(ValueError, match=msg):
             left.join(right, on='xy', how=join_type)
 
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match=msg):
             right.join(left, on=['abc', 'xy'], how=join_type)
 
 

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -119,25 +119,37 @@ def test_merge_index_singlekey_inner(self):
         assert_frame_equal(result, expected.loc[:, result.columns])
 
     def test_merge_misspecified(self):
-        pytest.raises(ValueError, merge, self.left, self.right,
-                      left_index=True)
-        pytest.raises(ValueError, merge, self.left, self.right,
-                      right_index=True)
-
-        pytest.raises(ValueError, merge, self.left, self.left,
-                      left_on='key', on='key')
-
-        pytest.raises(ValueError, merge, self.df, self.df2,
-                      left_on=['key1'], right_on=['key1', 'key2'])
+        msg = "Must pass right_on or right_index=True"
+        with pytest.raises(pd.errors.MergeError, match=msg):
+            merge(self.left, self.right, left_index=True)
+        msg = "Must pass left_on or left_index=True"
+        with pytest.raises(pd.errors.MergeError, match=msg):
+            merge(self.left, self.right, right_index=True)
+
+        msg = ('Can only pass argument "on" OR "left_on" and "right_on", not'
+               ' a combination of both')
+        with pytest.raises(pd.errors.MergeError, match=msg):
+            merge(self.left, self.left, left_on='key', on='key')
+
+        msg = r"len\(right_on\) must equal len\(left_on\)"
+        with pytest.raises(ValueError, match=msg):
+            merge(self.df, self.df2, left_on=['key1'],
+                  right_on=['key1', 'key2'])
 
     def test_index_and_on_parameters_confusion(self):
-        pytest.raises(ValueError, merge, self.df, self.df2, how='left',
-                      left_index=False, right_index=['key1', 'key2'])
-        pytest.raises(ValueError, merge, self.df, self.df2, how='left',
-                      left_index=['key1', 'key2'], right_index=False)
-        pytest.raises(ValueError, merge, self.df, self.df2, how='left',
-                      left_index=['key1', 'key2'],
-                      right_index=['key1', 'key2'])
+        msg = ("right_index parameter must be of type bool, not"
+               r" <(class|type) 'list'>")
+        with pytest.raises(ValueError, match=msg):
+            merge(self.df, self.df2, how='left',
+                  left_index=False, right_index=['key1', 'key2'])
+        msg = ("left_index parameter must be of type bool, not "
+               r"<(class|type) 'list'>")
+        with pytest.raises(ValueError, match=msg):
+            merge(self.df, self.df2, how='left',
+                  left_index=['key1', 'key2'], right_index=False)
+        with pytest.raises(ValueError, match=msg):
+            merge(self.df, self.df2, how='left',
+                  left_index=['key1', 'key2'], right_index=['key1', 'key2'])
 
     def test_merge_overlap(self):
         merged = merge(self.left, self.left, on='key')
@@ -269,7 +281,6 @@ def test_no_overlap_more_informative_error(self):
         df1 = DataFrame({'x': ['a']}, index=[dt])
 
         df2 = DataFrame({'y': ['b', 'c']}, index=[dt, dt])
-        pytest.raises(MergeError, merge, df1, df2)
 
         msg = ('No common columns to perform merge on. '
                'Merge options: left_on={lon}, right_on={ron}, '
@@ -566,7 +577,10 @@ def test_overlapping_columns_error_message(self):
 
         # #2649, #10639
         df2.columns = ['key1', 'foo', 'foo']
-        pytest.raises(ValueError, merge, df, df2)
+        msg = (r"Data columns not unique: Index\(\[u?'foo', u?'foo'\],"
+               r" dtype='object'\)")
+        with pytest.raises(MergeError, match=msg):
+            merge(df, df2)
 
     def test_merge_on_datetime64tz(self):
 
@@ -717,9 +731,10 @@ def test_indicator(self):
         assert_frame_equal(test_custom_name, df_result_custom_name)
 
         # Check only accepts strings and booleans
-        with pytest.raises(ValueError):
+        msg = "indicator option can only accept boolean or string arguments"
+        with pytest.raises(ValueError, match=msg):
             merge(df1, df2, on='col1', how='outer', indicator=5)
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match=msg):
             df1.merge(df2, on='col1', how='outer', indicator=5)
 
         # Check result integrity
@@ -743,20 +758,25 @@ def test_indicator(self):
         for i in ['_right_indicator', '_left_indicator', '_merge']:
             df_badcolumn = DataFrame({'col1': [1, 2], i: [2, 2]})
 
-            with pytest.raises(ValueError):
+            msg = ("Cannot use `indicator=True` option when data contains a"
+                   " column named {}|"
+                   "Cannot use name of an existing column for indicator"
+                   " column").format(i)
+            with pytest.raises(ValueError, match=msg):
                 merge(df1, df_badcolumn, on='col1',
                       how='outer', indicator=True)
-            with pytest.raises(ValueError):
+            with pytest.raises(ValueError, match=msg):
                 df1.merge(df_badcolumn, on='col1', how='outer', indicator=True)
 
         # Check for name conflict with custom name
         df_badcolumn = DataFrame(
             {'col1': [1, 2], 'custom_column_name': [2, 2]})
 
-        with pytest.raises(ValueError):
+        msg = "Cannot use name of an existing column for indicator column"
+        with pytest.raises(ValueError, match=msg):
             merge(df1, df_badcolumn, on='col1', how='outer',
                   indicator='custom_column_name')
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match=msg):
             df1.merge(df_badcolumn, on='col1', how='outer',
                       indicator='custom_column_name')
 
@@ -843,11 +863,13 @@ def test_validation(self):
         merge(left, right_w_dups, left_index=True, right_index=True,
               validate='one_to_many')
 
-        with pytest.raises(MergeError):
+        msg = ("Merge keys are not unique in right dataset; not a one-to-one"
+               " merge")
+        with pytest.raises(MergeError, match=msg):
             merge(left, right_w_dups, left_index=True, right_index=True,
                   validate='one_to_one')
 
-        with pytest.raises(MergeError):
+        with pytest.raises(MergeError, match=msg):
             merge(left, right_w_dups, on='a', validate='one_to_one')
 
         # Dups on left
@@ -856,26 +878,33 @@ def test_validation(self):
         merge(left_w_dups, right, left_index=True, right_index=True,
               validate='many_to_one')
 
-        with pytest.raises(MergeError):
+        msg = ("Merge keys are not unique in left dataset; not a one-to-one"
+               " merge")
+        with pytest.raises(MergeError, match=msg):
             merge(left_w_dups, right, left_index=True, right_index=True,
                   validate='one_to_one')
 
-        with pytest.raises(MergeError):
+        with pytest.raises(MergeError, match=msg):
             merge(left_w_dups, right, on='a', validate='one_to_one')
 
         # Dups on both
         merge(left_w_dups, right_w_dups, on='a', validate='many_to_many')
 
-        with pytest.raises(MergeError):
+        msg = ("Merge keys are not unique in right dataset; not a many-to-one"
+               " merge")
+        with pytest.raises(MergeError, match=msg):
             merge(left_w_dups, right_w_dups, left_index=True,
                   right_index=True, validate='many_to_one')
 
-        with pytest.raises(MergeError):
+        msg = ("Merge keys are not unique in left dataset; not a one-to-many"
+               " merge")
+        with pytest.raises(MergeError, match=msg):
             merge(left_w_dups, right_w_dups, on='a',
                   validate='one_to_many')
 
         # Check invalid arguments
-        with pytest.raises(ValueError):
+        msg = "Not a valid argument for validate"
+        with pytest.raises(ValueError, match=msg):
             merge(left, right, on='a', validate='jibberish')
 
         # Two column merge, dups in both, but jointly no dups.
@@ -896,7 +925,9 @@ def test_validation(self):
                                           'um... weasel noise?']},
                                    index=range(3))
 
-        with pytest.raises(MergeError):
+        msg = ("Merge keys are not unique in either left or right dataset;"
+               " not a one-to-one merge")
+        with pytest.raises(MergeError, match=msg):
             merge(left, right, on='a', validate='1:1')
 
         result = merge(left, right, on=['a', 'b'], validate='1:1')
@@ -1439,6 +1470,7 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
                           left_index=left_index, right_index=right_index)
         tm.assert_frame_equal(result, expected)
     else:
-        with pytest.raises(ValueError, match='a Series without a name'):
+        msg = "Cannot merge a Series without a name"
+        with pytest.raises(ValueError, match=msg):
             result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
                               left_index=left_index, right_index=right_index)