TST: GH30999 Add match=msg to all but two pytest.raises in tests/io (pandas-dev#38724)

moink · luckyvs1 · commit 01c223a4a7e2 · 2021-01-19T23:18:36.000-08:00
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
@@ -43,9 +43,10 @@ def test_read_xlrd_book(read_ext, frame):
 # TODO: test for openpyxl as well
 def test_excel_table_sheet_by_index(datapath, read_ext):
     path = datapath("io", "data", "excel", f"test1{read_ext}")
+    msg = "No sheet named <'invalid_sheet_name'>"
     with ExcelFile(path, engine="xlrd") as excel:
-        with pytest.raises(xlrd.XLRDError):
-            pd.read_excel(excel, sheet_name="asdf")
+        with pytest.raises(xlrd.XLRDError, match=msg):
+            pd.read_excel(excel, sheet_name="invalid_sheet_name")
 
 
 def test_excel_file_warning_with_xlsx_file(datapath):
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -200,12 +200,13 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df, s3so):
             tm.assert_frame_equal(tips_df.iloc[:10], df)
 
     def test_read_s3_fails(self, s3so):
-        with pytest.raises(IOError):
+        msg = "The specified bucket does not exist"
+        with pytest.raises(IOError, match=msg):
             read_csv("s3://nyqpug/asdf.csv", storage_options=s3so)
 
         # Receive a permission error when trying to read a private bucket.
         # It's irrelevant here that this isn't actually a table.
-        with pytest.raises(IOError):
+        with pytest.raises(IOError, match=msg):
             read_csv("s3://cant_get_it/file.csv")
 
     def test_write_s3_csv_fails(self, tips_df, s3so):
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
@@ -249,19 +249,15 @@ def test_multi_char_sep_quotes(python_parser_only, quoting):
     parser = python_parser_only
 
     data = 'a,,b\n1,,a\n2,,"2,,b"'
-    msg = "ignored when a multi-char delimiter is used"
 
-    def fail_read():
+    if quoting == csv.QUOTE_NONE:
+        msg = "Expected 2 fields in line 3, saw 3"
         with pytest.raises(ParserError, match=msg):
             parser.read_csv(StringIO(data), quoting=quoting, **kwargs)
-
-    if quoting == csv.QUOTE_NONE:
-        # We expect no match, so there should be an assertion
-        # error out of the inner context manager.
-        with pytest.raises(AssertionError):
-            fail_read()
     else:
-        fail_read()
+        msg = "ignored when a multi-char delimiter is used"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), quoting=quoting, **kwargs)
 
 
 def test_none_delimiter(python_parser_only, capsys):
@@ -286,20 +282,15 @@ def test_none_delimiter(python_parser_only, capsys):
 @pytest.mark.parametrize("skipfooter", [0, 1])
 def test_skipfooter_bad_row(python_parser_only, data, skipfooter):
     # see gh-13879 and gh-15910
-    msg = "parsing errors in the skipped footer rows"
     parser = python_parser_only
-
-    def fail_read():
+    if skipfooter:
+        msg = "parsing errors in the skipped footer rows"
         with pytest.raises(ParserError, match=msg):
             parser.read_csv(StringIO(data), skipfooter=skipfooter)
-
-    if skipfooter:
-        fail_read()
     else:
-        # We expect no match, so there should be an assertion
-        # error out of the inner context manager.
-        with pytest.raises(AssertionError):
-            fail_read()
+        msg = "unexpected end of data|expected after"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), skipfooter=skipfooter)
 
 
 def test_malformed_skipfooter(python_parser_only):
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
@@ -213,15 +213,16 @@ def test_inconsistent_number_of_rows(datapath):
 def test_zero_variables(datapath):
     # Check if the SAS file has zero variables (PR #18184)
     fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
-    with pytest.raises(EmptyDataError):
+    with pytest.raises(EmptyDataError, match="No columns to parse from file"):
         pd.read_sas(fname)
 
 
 def test_corrupt_read(datapath):
     # We don't really care about the exact failure, the important thing is
     # that the resource should be cleaned up afterwards (BUG #35566)
     fname = datapath("io", "sas", "data", "corrupt.sas7bdat")
-    with pytest.raises(AttributeError):
+    msg = "'SAS7BDATReader' object has no attribute 'row_count'"
+    with pytest.raises(AttributeError, match=msg):
         pd.read_sas(fname)
 
 
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -406,7 +406,8 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
                 df.to_csv(path, compression=compression_, encoding=encoding)
 
             # reading should fail (otherwise we wouldn't need the warning)
-            with pytest.raises(Exception):
+            msg = r"UTF-\d+ stream does not start with BOM"
+            with pytest.raises(UnicodeError, match=msg):
                 pd.read_csv(path, compression=compression_, encoding=encoding)
 
 
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
@@ -21,11 +21,19 @@
 @filter_sparse
 @pytest.mark.single
 class TestFeather:
-    def check_error_on_write(self, df, exc):
+    def check_error_on_write(self, df, exc, err_msg):
         # check that we are raising the exception
         # on writing
 
-        with pytest.raises(exc):
+        with pytest.raises(exc, match=err_msg):
+            with tm.ensure_clean() as path:
+                to_feather(df, path)
+
+    def check_external_error_on_write(self, df):
+        # check that we are raising the exception
+        # on writing
+
+        with tm.external_error_raised(Exception):
             with tm.ensure_clean() as path:
                 to_feather(df, path)
 
@@ -42,14 +50,15 @@ def check_round_trip(self, df, expected=None, write_kwargs={}, **read_kwargs):
 
     def test_error(self):
 
+        msg = "feather only support IO with DataFrames"
         for obj in [
             pd.Series([1, 2, 3]),
             1,
             "foo",
             pd.Timestamp("20130101"),
             np.array([1, 2, 3]),
         ]:
-            self.check_error_on_write(obj, ValueError)
+            self.check_error_on_write(obj, ValueError, msg)
 
     def test_basic(self):
 
@@ -95,12 +104,13 @@ def test_duplicate_columns(self):
         # https://github.com/wesm/feather/issues/53
         # not currently able to handle duplicate columns
         df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
-        self.check_error_on_write(df, ValueError)
+        self.check_external_error_on_write(df)
 
     def test_stringify_columns(self):
 
         df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
-        self.check_error_on_write(df, ValueError)
+        msg = "feather must have string column names"
+        self.check_error_on_write(df, ValueError, msg)
 
     def test_read_columns(self):
         # GH 24025
@@ -125,8 +135,7 @@ def test_unsupported_other(self):
 
         # mixed python objects
         df = pd.DataFrame({"a": ["a", 1, 2.0]})
-        # Some versions raise ValueError, others raise ArrowInvalid.
-        self.check_error_on_write(df, Exception)
+        self.check_external_error_on_write(df)
 
     def test_rw_use_threads(self):
         df = pd.DataFrame({"A": np.arange(100000)})
@@ -138,6 +147,10 @@ def test_write_with_index(self):
         df = pd.DataFrame({"A": [1, 2, 3]})
         self.check_round_trip(df)
 
+        msg = (
+            r"feather does not support serializing .* for the index; "
+            r"you can \.reset_index\(\) to make the index into column\(s\)"
+        )
         # non-default index
         for index in [
             [2, 3, 4],
@@ -148,17 +161,19 @@ def test_write_with_index(self):
         ]:
 
             df.index = index
-            self.check_error_on_write(df, ValueError)
+            self.check_error_on_write(df, ValueError, msg)
 
         # index with meta-data
         df.index = [0, 1, 2]
         df.index.name = "foo"
-        self.check_error_on_write(df, ValueError)
+        msg = "feather does not serialize index meta-data on a default index"
+        self.check_error_on_write(df, ValueError, msg)
 
         # column multi-index
         df.index = [0, 1, 2]
         df.columns = pd.MultiIndex.from_tuples([("a", 1)])
-        self.check_error_on_write(df, ValueError)
+        msg = "feather must have string column names"
+        self.check_error_on_write(df, ValueError, msg)
 
     def test_path_pathlib(self):
         df = tm.makeDataFrame().reset_index()
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
@@ -51,18 +51,16 @@ def test_reasonable_error(monkeypatch, cleared_fs):
     from fsspec.registry import known_implementations
 
     registry.target.clear()
-    with pytest.raises(ValueError) as e:
+    with pytest.raises(ValueError, match="nosuchprotocol"):
         read_csv("nosuchprotocol://test/test.csv")
-        assert "nosuchprotocol" in str(e.value)
-    err_mgs = "test error messgae"
+    err_msg = "test error message"
     monkeypatch.setitem(
         known_implementations,
         "couldexist",
-        {"class": "unimportable.CouldExist", "err": err_mgs},
+        {"class": "unimportable.CouldExist", "err": err_msg},
     )
-    with pytest.raises(ImportError) as e:
+    with pytest.raises(ImportError, match=err_msg):
         read_csv("couldexist://test/test.csv")
-        assert err_mgs in str(e.value)
 
 
 def test_to_csv(cleared_fs):
@@ -225,9 +223,9 @@ def test_s3_parquet(s3_resource, s3so):
 
 @td.skip_if_installed("fsspec")
 def test_not_present_exception():
-    with pytest.raises(ImportError) as e:
+    msg = "Missing optional dependency 'fsspec'|fsspec library is required"
+    with pytest.raises(ImportError, match=msg):
         read_csv("memory://test/test.csv")
-        assert "fsspec library is required" in str(e.value)
 
 
 @td.skip_if_no("pyarrow")
diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py
@@ -11,6 +11,7 @@
 
 import pandas as pd
 from pandas import DataFrame
+import pandas._testing as tm
 
 api_exceptions = pytest.importorskip("google.api_core.exceptions")
 bigquery = pytest.importorskip("google.cloud.bigquery")
@@ -195,7 +196,7 @@ def test_roundtrip(self, gbq_dataset):
         "if_exists, expected_num_rows, expectation",
         [
             ("append", 300, does_not_raise()),
-            ("fail", 200, pytest.raises(pandas_gbq.gbq.TableCreationError)),
+            ("fail", 200, tm.external_error_raised(pandas_gbq.gbq.TableCreationError)),
             ("replace", 100, does_not_raise()),
         ],
     )
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
@@ -149,6 +149,5 @@ def open(self, path, mode="r", *args):
 
 @td.skip_if_installed("gcsfs")
 def test_gcs_not_present_exception():
-    with pytest.raises(ImportError) as e:
+    with tm.external_error_raised(ImportError):
         read_csv("gs://test/test.csv")
-        assert "gcsfs library is required" in str(e.value)
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
@@ -302,17 +302,18 @@ def test_file_like(self):
 
     @tm.network
     def test_bad_url_protocol(self):
-        with pytest.raises(URLError):
+        with pytest.raises(URLError, match="urlopen error unknown url type: git"):
             self.read_html("git://github.com", match=".*Water.*")
 
     @tm.network
     @pytest.mark.slow
     def test_invalid_url(self):
-        try:
-            with pytest.raises(URLError):
-                self.read_html("http://www.a23950sdfa908sd.com", match=".*Water.*")
-        except ValueError as e:
-            assert "No tables found" in str(e)
+        msg = (
+            "Name or service not known|Temporary failure in name resolution|"
+            "No tables found"
+        )
+        with pytest.raises((URLError, ValueError), match=msg):
+            self.read_html("http://www.a23950sdfa908sd.com", match=".*Water.*")
 
     @pytest.mark.slow
     def test_file_url(self):
@@ -949,8 +950,13 @@ def test_decimal_rows(self):
 
     def test_bool_header_arg(self):
         # GH 6114
+        msg = re.escape(
+            "Passing a bool to header is invalid. Use header=None for no header or "
+            "header=int or list-like of ints to specify the row(s) making up the "
+            "column names"
+        )
         for arg in [True, False]:
-            with pytest.raises(TypeError):
+            with pytest.raises(TypeError, match=msg):
                 self.read_html(self.spam_data, header=arg)
 
     def test_converters(self):
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
diff --git a/pandas/tests/io/test_user_agent.py b/pandas/tests/io/test_user_agent.py

Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@`
`11`	`11`
`12`	`12`	`import pandas as pd`
`13`	`13`	`from pandas import DataFrame`
	`14`	`+import pandas._testing as tm`
`14`	`15`
`15`	`16`	`api_exceptions = pytest.importorskip("google.api_core.exceptions")`
`16`	`17`	`bigquery = pytest.importorskip("google.cloud.bigquery")`
`@@ -195,7 +196,7 @@ def test_roundtrip(self, gbq_dataset):`
`195`	`196`	`"if_exists, expected_num_rows, expectation",`
`196`	`197`	`[`
`197`	`198`	`("append", 300, does_not_raise()),`
`198`		`- ("fail", 200, pytest.raises(pandas_gbq.gbq.TableCreationError)),`
	`199`	`+ ("fail", 200, tm.external_error_raised(pandas_gbq.gbq.TableCreationError)),`
`199`	`200`	`("replace", 100, does_not_raise()),`
`200`	`201`	`],`
`201`	`202`	`)`