pandas-dev · jreback · Jan 9, 2019 · Jan 8, 2019 · Jan 9, 2019 · Jan 9, 2019
diff --git a/pandas/tests/io/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py
@@ -30,7 +30,9 @@ def test_foobar(self):
         assert unpacker.unpack() == ord(b'b')
         assert unpacker.unpack() == ord(b'a')
         assert unpacker.unpack() == ord(b'r')
-        pytest.raises(OutOfData, unpacker.unpack)
+        msg = "No more data to unpack"
+        with pytest.raises(OutOfData, match=msg):
+            unpacker.unpack()
 
         unpacker.feed(b'foo')
         unpacker.feed(b'bar')
@@ -50,13 +52,18 @@ def test_foobar_skip(self):
         unpacker.skip()
         assert unpacker.unpack() == ord(b'a')
         unpacker.skip()
-        pytest.raises(OutOfData, unpacker.unpack)
+        msg = "No more data to unpack"
+        with pytest.raises(OutOfData, match=msg):
+            unpacker.unpack()
 
     def test_maxbuffersize(self):
-        pytest.raises(ValueError, Unpacker, read_size=5, max_buffer_size=3)
+        msg = "read_size should be less or equal to max_buffer_size"
+        with pytest.raises(ValueError, match=msg):
+            Unpacker(read_size=5, max_buffer_size=3)
         unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1)
         unpacker.feed(b'fo')
-        pytest.raises(BufferFull, unpacker.feed, b'ob')
+        with pytest.raises(BufferFull, match=r'$^'):
+            unpacker.feed(b'ob')
         unpacker.feed(b'o')
         assert ord('f') == next(unpacker)
         unpacker.feed(b'b')

diff --git a/pandas/tests/io/msgpack/test_unpack.py b/pandas/tests/io/msgpack/test_unpack.py
@@ -16,7 +16,9 @@ def test_unpack_array_header_from_file(self):
         assert unpacker.unpack() == 2
         assert unpacker.unpack() == 3
         assert unpacker.unpack() == 4
-        pytest.raises(OutOfData, unpacker.unpack)
+        msg = "No more data to unpack"
+        with pytest.raises(OutOfData, match=msg):
+            unpacker.unpack()
 
     def test_unpacker_hook_refcnt(self):
         if not hasattr(sys, 'getrefcount'):

diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
@@ -145,7 +145,10 @@ def test_skip_bad_lines(self, capsys):
 
         reader = TextReader(StringIO(data), delimiter=':',
                             header=None)
-        pytest.raises(parser.ParserError, reader.read)
+        msg = (r"Error tokenizing data\. C error: Expected 3 fields in"
+               " line 4, saw 4")
+        with pytest.raises(parser.ParserError, match=msg):
+            reader.read()
 
         reader = TextReader(StringIO(data), delimiter=':',
                             header=None,

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -3,7 +3,6 @@
 """
 import mmap
 import os
-import re
 
 import pytest
 
@@ -146,7 +145,16 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
         pytest.importorskip(module)
 
         path = os.path.join(HERE, 'data', 'does_not_exist.' + fn_ext)
-        with pytest.raises(error_class):
+        msg1 = (r"File (b')?.+does_not_exist\.{}'? does not exist"
+                .format(fn_ext))
+        msg2 = (r"\[Errno 2\] No such file or directory: '.+does_not_exist"
+                r"\.{}'").format(fn_ext)
+        msg3 = "Expected object or value"
+        msg4 = "path_or_buf needs to be a string file path or file-like"
+        msg5 = (r"\[Errno 2\] File .+does_not_exist\.{} does not exist:"
+                r" '.+does_not_exist\.{}'").format(fn_ext, fn_ext)
+        with pytest.raises(error_class, match=r"({}|{}|{}|{}|{})".format(
+                msg1, msg2, msg3, msg4, msg5)):
             reader(path)
 
     @pytest.mark.parametrize('reader, module, error_class, fn_ext', [
@@ -169,14 +177,26 @@ def test_read_expands_user_home_dir(self, reader, module,
         monkeypatch.setattr(icom, '_expand_user',
                             lambda x: os.path.join('foo', x))
 
-        message = "".join(["foo", os.path.sep, "does_not_exist.", fn_ext])
-
-        with pytest.raises(error_class, message=re.escape(message)):
+        msg1 = (r"File (b')?.+does_not_exist\.{}'? does not exist"
+                .format(fn_ext))
+        msg2 = (r"\[Errno 2\] No such file or directory:"
+                r" '.+does_not_exist\.{}'").format(fn_ext)
+        msg3 = "Unexpected character found when decoding 'false'"
+        msg4 = "path_or_buf needs to be a string file path or file-like"
+        msg5 = (r"\[Errno 2\] File .+does_not_exist\.{} does not exist:"
+                r" '.+does_not_exist\.{}'").format(fn_ext, fn_ext)
+
+        with pytest.raises(error_class, match=r"({}|{}|{}|{}|{})".format(
+                msg1, msg2, msg3, msg4, msg5)):
             reader(path)
 
     def test_read_non_existant_read_table(self):
         path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv')
-        with pytest.raises(FileNotFoundError):
+        msg1 = r"File b'.+does_not_exist\.csv' does not exist"
+        msg2 = (r"\[Errno 2\] File .+does_not_exist\.csv does not exist:"
+                r" '.+does_not_exist\.csv'")
+        with pytest.raises(FileNotFoundError, match=r"({}|{})".format(
+                msg1, msg2)):
             with tm.assert_produces_warning(FutureWarning):
                 pd.read_table(path)
 
@@ -326,7 +346,8 @@ def test_next(self, mmap_file):
             next_line = next(wrapper)
             assert next_line.strip() == line.strip()
 
-        pytest.raises(StopIteration, next, wrapper)
+        with pytest.raises(StopIteration, match=r'$^'):
+            next(wrapper)
 
     def test_unknown_engine(self):
         with tm.ensure_clean() as path:

diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
@@ -156,9 +156,14 @@ class A(object):
             def __init__(self):
                 self.read = 0
 
-        pytest.raises(ValueError, read_msgpack, path_or_buf=None)
-        pytest.raises(ValueError, read_msgpack, path_or_buf={})
-        pytest.raises(ValueError, read_msgpack, path_or_buf=A())
+        msg = (r"Invalid file path or buffer object type: <(class|type)"
+               r" '{}'>")
+        with pytest.raises(ValueError, match=msg.format('NoneType')):
+            read_msgpack(path_or_buf=None)
+        with pytest.raises(ValueError, match=msg.format('dict')):
+            read_msgpack(path_or_buf={})
+        with pytest.raises(ValueError, match=msg.format(r'.*\.A')):
+            read_msgpack(path_or_buf=A())
 
 
 class TestNumpy(TestPackers):
@@ -567,7 +572,9 @@ def _check_roundtrip(self, obj, comparator, **kwargs):
         # currently these are not implemetned
         # i_rec = self.encode_decode(obj)
         # comparator(obj, i_rec, **kwargs)
-        pytest.raises(NotImplementedError, self.encode_decode, obj)
+        msg = r"msgpack sparse (series|frame) is not implemented"
+        with pytest.raises(NotImplementedError, match=msg):
+            self.encode_decode(obj)
 
     def test_sparse_series(self):
 

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -506,7 +506,8 @@ def test_invalid_timestamp(self, version):
         original = DataFrame([(1,)], columns=['variable'])
         time_stamp = '01 Jan 2000, 00:00:00'
         with tm.ensure_clean() as path:
-            with pytest.raises(ValueError):
+            msg = "time_stamp should be datetime type"
+            with pytest.raises(ValueError, match=msg):
                 original.to_stata(path, time_stamp=time_stamp,
                                   version=version)
 
@@ -547,8 +548,8 @@ def test_no_index(self):
         with tm.ensure_clean() as path:
             original.to_stata(path, write_index=False)
             written_and_read_again = self.read_dta(path)
-            pytest.raises(
-                KeyError, lambda: written_and_read_again['index_not_written'])
+            with pytest.raises(KeyError, match=original.index.name):
+                written_and_read_again['index_not_written']
 
     def test_string_no_dates(self):
         s1 = Series(['a', 'A longer string'])
@@ -713,7 +714,11 @@ def test_excessively_long_string(self):
             s['s' + str(str_len)] = Series(['a' * str_len,
                                             'b' * str_len, 'c' * str_len])
         original = DataFrame(s)
-        with pytest.raises(ValueError):
+        msg = (r"Fixed width strings in Stata \.dta files are limited to 244"
+               r" \(or fewer\)\ncharacters\.  Column 's500' does not satisfy"
+               r" this restriction\. Use the\n'version=117' parameter to write"
+               r" the newer \(Stata 13 and later\) format\.")
+        with pytest.raises(ValueError, match=msg):
             with tm.ensure_clean() as path:
                 original.to_stata(path)
 
@@ -864,11 +869,14 @@ def test_drop_column(self):
                                columns=columns)
         tm.assert_frame_equal(expected, reordered)
 
-        with pytest.raises(ValueError):
+        msg = "columns contains duplicate entries"
+        with pytest.raises(ValueError, match=msg):
             columns = ['byte_', 'byte_']
             read_stata(self.dta15_117, convert_dates=True, columns=columns)
 
-        with pytest.raises(ValueError):
+        msg = ("The following columns were not found in the Stata data set:"
+               " not_found")
+        with pytest.raises(ValueError, match=msg):
             columns = ['byte_', 'int_', 'long_', 'not_found']
             read_stata(self.dta15_117, convert_dates=True, columns=columns)
 
@@ -924,7 +932,10 @@ def test_categorical_warnings_and_errors(self):
         original = pd.concat([original[col].astype('category')
                               for col in original], axis=1)
         with tm.ensure_clean() as path:
-            pytest.raises(ValueError, original.to_stata, path)
+            msg = ("Stata value labels for a single variable must have"
+                   r" a combined length less than 32,000 characters\.")
+            with pytest.raises(ValueError, match=msg):
+                original.to_stata(path)
 
         original = pd.DataFrame.from_records(
             [['a'],
@@ -1196,14 +1207,17 @@ def test_invalid_variable_labels(self, version):
                            'b': 'City Exponent',
                            'c': 'City'}
         with tm.ensure_clean() as path:
-            with pytest.raises(ValueError):
+            msg = "Variable labels must be 80 characters or fewer"
+            with pytest.raises(ValueError, match=msg):
                 original.to_stata(path,
                                   variable_labels=variable_labels,
                                   version=version)
 
         variable_labels['a'] = u'invalid character Œ'
         with tm.ensure_clean() as path:
-            with pytest.raises(ValueError):
+            msg = ("Variable labels must contain only characters that can be"
+                   " encoded in Latin-1")
+            with pytest.raises(ValueError, match=msg):
                 original.to_stata(path,
                                   variable_labels=variable_labels,
                                   version=version)
@@ -1221,7 +1235,9 @@ def test_write_variable_label_errors(self):
                                 'b': 'City Exponent',
                                 'c': u''.join(values)}
 
-        with pytest.raises(ValueError):
+        msg = ("Variable labels must contain only characters that can be"
+               " encoded in Latin-1")
+        with pytest.raises(ValueError, match=msg):
             with tm.ensure_clean() as path:
                 original.to_stata(path, variable_labels=variable_labels_utf8)
 
@@ -1231,7 +1247,8 @@ def test_write_variable_label_errors(self):
                                      'that is too long for Stata which means '
                                      'that it has more than 80 characters'}
 
-        with pytest.raises(ValueError):
+        msg = "Variable labels must be 80 characters or fewer"
+        with pytest.raises(ValueError, match=msg):
             with tm.ensure_clean() as path:
                 original.to_stata(path, variable_labels=variable_labels_long)
 
@@ -1265,7 +1282,8 @@ def test_default_date_conversion(self):
     def test_unsupported_type(self):
         original = pd.DataFrame({'a': [1 + 2j, 2 + 4j]})
 
-        with pytest.raises(NotImplementedError):
+        msg = "Data type complex128 not supported"
+        with pytest.raises(NotImplementedError, match=msg):
             with tm.ensure_clean() as path:
                 original.to_stata(path)
 
@@ -1277,7 +1295,8 @@ def test_unsupported_datetype(self):
                                  'strs': ['apple', 'banana', 'cherry'],
                                  'dates': dates})
 
-        with pytest.raises(NotImplementedError):
+        msg = "Format %tC not implemented"
+        with pytest.raises(NotImplementedError, match=msg):
             with tm.ensure_clean() as path:
                 original.to_stata(path, convert_dates={'dates': 'tC'})
 
@@ -1291,9 +1310,10 @@ def test_unsupported_datetype(self):
 
     def test_repeated_column_labels(self):
         # GH 13923
-        with pytest.raises(ValueError) as cm:
+        msg = (r"Value labels for column ethnicsn are not unique\. The"
+               r" repeated labels are:\n\n-+wolof")
+        with pytest.raises(ValueError, match=msg):
             read_stata(self.dta23, convert_categoricals=True)
-            assert 'wolof' in cm.exception
 
     def test_stata_111(self):
         # 111 is an old version but still used by current versions of
@@ -1316,17 +1336,18 @@ def test_out_of_range_double(self):
                         'ColumnTooBig': [0.0,
                                          np.finfo(np.double).eps,
                                          np.finfo(np.double).max]})
-        with pytest.raises(ValueError) as cm:
+        msg = (r"Column ColumnTooBig has a maximum value \(.+\)"
+               r" outside the range supported by Stata \(.+\)")
+        with pytest.raises(ValueError, match=msg):
             with tm.ensure_clean() as path:
                 df.to_stata(path)
-            assert 'ColumnTooBig' in cm.exception
 
         df.loc[2, 'ColumnTooBig'] = np.inf
-        with pytest.raises(ValueError) as cm:
+        msg = ("Column ColumnTooBig has a maximum value of infinity which"
+               " is outside the range supported by Stata")
+        with pytest.raises(ValueError, match=msg):
             with tm.ensure_clean() as path:
                 df.to_stata(path)
-            assert 'ColumnTooBig' in cm.exception
-            assert 'infinity' in cm.exception
 
     def test_out_of_range_float(self):
         original = DataFrame({'ColumnOk': [0.0,
@@ -1348,11 +1369,11 @@ def test_out_of_range_float(self):
                                   reread.set_index('index'))
 
         original.loc[2, 'ColumnTooBig'] = np.inf
-        with pytest.raises(ValueError) as cm:
+        msg = ("Column ColumnTooBig has a maximum value of infinity which"
+               " is outside the range supported by Stata")
+        with pytest.raises(ValueError, match=msg):
             with tm.ensure_clean() as path:
                 original.to_stata(path)
-            assert 'ColumnTooBig' in cm.exception
-            assert 'infinity' in cm.exception
 
     def test_path_pathlib(self):
         df = tm.makeDataFrame()
@@ -1466,7 +1487,8 @@ def test_invalid_date_conversion(self):
                                  'dates': dates})
 
         with tm.ensure_clean() as path:
-            with pytest.raises(ValueError):
+            msg = "convert_dates key must be a column or an integer"
+            with pytest.raises(ValueError, match=msg):
                 original.to_stata(path,
                                   convert_dates={'wrong_name': 'tc'})
 
@@ -1546,18 +1568,27 @@ def test_all_none_exception(self, version):
         output = pd.DataFrame(output)
         output.loc[:, 'none'] = None
         with tm.ensure_clean() as path:
-            with pytest.raises(ValueError) as excinfo:
+            msg = (r"Column `none` cannot be exported\.\n\n"
+                   "Only string-like object arrays containing all strings or a"
+                   r" mix of strings and None can be exported\. Object arrays"
+                   r" containing only null values are prohibited\. Other"
+                   " object typescannot be exported and must first be"
+                   r" converted to one of the supported types\.")
+            with pytest.raises(ValueError, match=msg):
                 output.to_stata(path, version=version)
-        assert 'Only string-like' in excinfo.value.args[0]
-        assert 'Column `none`' in excinfo.value.args[0]
 
     @pytest.mark.parametrize('version', [114, 117])
     def test_invalid_file_not_written(self, version):
         content = 'Here is one __�__ Another one __·__ Another one __½__'
         df = DataFrame([content], columns=['invalid'])
         expected_exc = UnicodeEncodeError if PY3 else UnicodeDecodeError
         with tm.ensure_clean() as path:
-            with pytest.raises(expected_exc):
+            msg1 = (r"'latin-1' codec can't encode character '\\ufffd'"
+                    r" in position 14: ordinal not in range\(256\)")
+            msg2 = ("'ascii' codec can't decode byte 0xef in position 14:"
+                    r" ordinal not in range\(128\)")
+            with pytest.raises(expected_exc, match=r'{}|{}'.format(
+                    msg1, msg2)):
                 with tm.assert_produces_warning(ResourceWarning):
                     df.to_stata(path)