TST: Scope pytest.raises closer to failing line (pandas-dev#56746)

mroeschke · pmhatre1 · commit b72f90bb9a3b · 2024-05-06T23:10:21.000-07:00
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -141,8 +141,8 @@ class TestEval:
     def test_complex_cmp_ops(self, cmp1, cmp2, binop, lhs, rhs, engine, parser):
         if parser == "python" and binop in ["and", "or"]:
             msg = "'BoolOp' nodes are not implemented"
+            ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)"
             with pytest.raises(NotImplementedError, match=msg):
-                ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)"
                 pd.eval(ex, engine=engine, parser=parser)
             return
 
@@ -161,9 +161,8 @@ def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser):
 
         if parser == "python" and cmp_op in ["in", "not in"]:
             msg = "'(In|NotIn)' nodes are not implemented"
-
+            ex = f"lhs {cmp_op} rhs"
             with pytest.raises(NotImplementedError, match=msg):
-                ex = f"lhs {cmp_op} rhs"
                 pd.eval(ex, engine=engine, parser=parser)
             return
 
@@ -193,8 +192,8 @@ def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser):
     def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser):
         if parser == "python" and op in ["in", "not in"]:
             msg = "'(In|NotIn)' nodes are not implemented"
+            ex = f"~(lhs {op} rhs)"
             with pytest.raises(NotImplementedError, match=msg):
-                ex = f"~(lhs {op} rhs)"
                 pd.eval(ex, engine=engine, parser=parser)
             return
 
diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py
@@ -168,25 +168,25 @@ def test_compare_multi_index(align_axis):
     tm.assert_frame_equal(result, expected)
 
 
-def test_compare_unaligned_objects():
-    # test DataFrames with different indices
+def test_compare_different_indices():
     msg = (
         r"Can only compare identically-labeled \(both index and columns\) DataFrame "
         "objects"
     )
+    df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"])
+    df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"])
     with pytest.raises(ValueError, match=msg):
-        df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"])
-        df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"])
         df1.compare(df2)
 
-    # test DataFrames with different shapes
+
+def test_compare_different_shapes():
     msg = (
         r"Can only compare identically-labeled \(both index and columns\) DataFrame "
         "objects"
     )
+    df1 = pd.DataFrame(np.ones((3, 3)))
+    df2 = pd.DataFrame(np.zeros((2, 1)))
     with pytest.raises(ValueError, match=msg):
-        df1 = pd.DataFrame(np.ones((3, 3)))
-        df2 = pd.DataFrame(np.zeros((2, 1)))
         df1.compare(df2)
 
 
diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py
@@ -111,12 +111,10 @@ def test_sample_invalid_weight_lengths(self, obj):
             obj.sample(n=3, weights=[0, 1])
 
         with pytest.raises(ValueError, match=msg):
-            bad_weights = [0.5] * 11
-            obj.sample(n=3, weights=bad_weights)
+            obj.sample(n=3, weights=[0.5] * 11)
 
         with pytest.raises(ValueError, match="Fewer non-zero entries in p than size"):
-            bad_weight_series = Series([0, 0, 0.2])
-            obj.sample(n=4, weights=bad_weight_series)
+            obj.sample(n=4, weights=Series([0, 0, 0.2]))
 
     def test_sample_negative_weights(self, obj):
         # Check won't accept negative weights
diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py
@@ -98,21 +98,23 @@ def test_tz_convert_and_localize(self, fn):
             tm.assert_index_equal(df3.index.levels[1], l1_expected)
             assert not df3.index.levels[1].equals(l1)
 
-        # Bad Inputs
-
+    @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"])
+    def test_tz_convert_and_localize_bad_input(self, fn):
+        int_idx = Index(range(5))
+        l0 = date_range("20140701", periods=5, freq="D")
         # Not DatetimeIndex / PeriodIndex
+        df = DataFrame(index=int_idx)
         with pytest.raises(TypeError, match="DatetimeIndex"):
-            df = DataFrame(index=int_idx)
             getattr(df, fn)("US/Pacific")
 
         # Not DatetimeIndex / PeriodIndex
+        df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
         with pytest.raises(TypeError, match="DatetimeIndex"):
-            df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
             getattr(df, fn)("US/Pacific", level=0)
 
         # Invalid level
+        df = DataFrame(index=l0)
         with pytest.raises(ValueError, match="not valid"):
-            df = DataFrame(index=l0)
             getattr(df, fn)("US/Pacific", level=1)
 
     @pytest.mark.parametrize("copy", [True, False])
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -466,8 +466,8 @@ def numpystd(x):
 
     # this uses column selection & renaming
     msg = r"nested renamer is not supported"
+    d = {"C": "mean", "D": {"foo": "mean", "bar": "std"}}
     with pytest.raises(SpecificationError, match=msg):
-        d = {"C": "mean", "D": {"foo": "mean", "bar": "std"}}
         grouped.aggregate(d)
 
     # But without renaming, these functions are OK
diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -450,8 +450,8 @@ def test_getitem_with_datestring_with_UTC_offset(self, start, end):
         with pytest.raises(ValueError, match="Both dates must"):
             df[start : end[:-4] + "1:00"]
 
+        df = df.tz_localize(None)
         with pytest.raises(ValueError, match="The index must be timezone"):
-            df = df.tz_localize(None)
             df[start:end]
 
     def test_slice_reduce_to_series(self):
diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py
@@ -54,7 +54,6 @@ def test_series_mask_boolean(values, dtype, mask, indexer_class, frame):
         msg = "iLocation based boolean indexing cannot use an indexable as a mask"
         with pytest.raises(ValueError, match=msg):
             result = obj.iloc[mask]
-            tm.assert_equal(result, expected)
     else:
         result = obj.iloc[mask]
         tm.assert_equal(result, expected)
diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py
@@ -53,8 +53,8 @@ def test_iat_set_ints(self, dtype, frame_or_series):
     def test_iat_set_other(self, index, frame_or_series):
         f = frame_or_series(range(len(index)), index=index)
         msg = "iAt based indexing can only have integer indexers"
+        idx = next(generate_indices(f, False))
         with pytest.raises(ValueError, match=msg):
-            idx = next(generate_indices(f, False))
             f.iat[idx] = 1
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -381,8 +381,8 @@ def test_duplicate_ref_loc_failure(self):
 
         msg = "Gaps in blk ref_locs"
 
+        mgr = BlockManager(blocks, axes)
         with pytest.raises(AssertionError, match=msg):
-            mgr = BlockManager(blocks, axes)
             mgr._rebuild_blknos_and_blklocs()
 
         blocks[0].mgr_locs = BlockPlacement(np.array([0]))
diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
@@ -269,8 +269,8 @@ def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
     # GH 40230
     df = DataFrame({"fruit": ["pear"]})
     with tm.ensure_clean(ext) as f:
+        df.to_excel(f, sheet_name="foo", engine="openpyxl")
         with pytest.raises(ValueError, match=re.escape(msg)):
-            df.to_excel(f, sheet_name="foo", engine="openpyxl")
             with ExcelWriter(
                 f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
             ) as writer:
diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py
@@ -125,8 +125,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
     expected.index = expected.index.astype(object)
 
     with pytest.raises(ValueError, match="Duplicate names"):
-        data = ""
-        parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"})
+        parser.read_csv(StringIO(""), names=["one", "one"], dtype={0: "u1", 1: "f"})
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
@@ -32,8 +32,7 @@ def test_read_with_bad_header(all_parsers):
     msg = r"but only \d+ lines in file"
 
     with pytest.raises(ValueError, match=msg):
-        s = StringIO(",,")
-        parser.read_csv(s, header=[10])
+        parser.read_csv(StringIO(",,"), header=[10])
 
 
 def test_negative_header(all_parsers):
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -309,13 +309,13 @@ def test_write_explicit(self, compression, get_random_path):
 
     @pytest.mark.parametrize("compression", ["", "None", "bad", "7z"])
     def test_write_explicit_bad(self, compression, get_random_path):
-        with pytest.raises(ValueError, match="Unrecognized compression type"):
-            with tm.ensure_clean(get_random_path) as path:
-                df = DataFrame(
-                    1.1 * np.arange(120).reshape((30, 4)),
-                    columns=Index(list("ABCD"), dtype=object),
-                    index=Index([f"i-{i}" for i in range(30)], dtype=object),
-                )
+        df = DataFrame(
+            1.1 * np.arange(120).reshape((30, 4)),
+            columns=Index(list("ABCD"), dtype=object),
+            index=Index([f"i-{i}" for i in range(30)], dtype=object),
+        )
+        with tm.ensure_clean(get_random_path) as path:
+            with pytest.raises(ValueError, match="Unrecognized compression type"):
                 df.to_pickle(path, compression=compression)
 
     def test_write_infer(self, compression_ext, get_random_path):
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -957,20 +957,18 @@ def test_drop_column(self, datapath):
 
         msg = "columns contains duplicate entries"
         with pytest.raises(ValueError, match=msg):
-            columns = ["byte_", "byte_"]
             read_stata(
                 datapath("io", "data", "stata", "stata6_117.dta"),
                 convert_dates=True,
-                columns=columns,
+                columns=["byte_", "byte_"],
             )
 
         msg = "The following columns were not found in the Stata data set: not_found"
         with pytest.raises(ValueError, match=msg):
-            columns = ["byte_", "int_", "long_", "not_found"]
             read_stata(
                 datapath("io", "data", "stata", "stata6_117.dta"),
                 convert_dates=True,
-                columns=columns,
+                columns=["byte_", "int_", "long_", "not_found"],
             )
 
     @pytest.mark.parametrize("version", [114, 117, 118, 119, None])
@@ -2196,16 +2194,16 @@ def test_non_categorical_value_labels():
             assert reader_value_labels == expected
 
         msg = "Can't create value labels for notY, it wasn't found in the dataset."
+        value_labels = {"notY": {7: "label1", 8: "label2"}}
         with pytest.raises(KeyError, match=msg):
-            value_labels = {"notY": {7: "label1", 8: "label2"}}
             StataWriter(path, data, value_labels=value_labels)
 
         msg = (
             "Can't create value labels for Z, value labels "
             "can only be applied to numeric columns."
         )
+        value_labels = {"Z": {1: "a", 2: "k", 3: "j", 4: "i"}}
         with pytest.raises(ValueError, match=msg):
-            value_labels = {"Z": {1: "a", 2: "k", 3: "j", 4: "i"}}
             StataWriter(path, data, value_labels=value_labels)
 
 
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
@@ -471,20 +471,22 @@ def test_empty_string_lxml(val):
             r"None \(line 0\)",
         ]
     )
+    if isinstance(val, str):
+        data = StringIO(val)
+    else:
+        data = BytesIO(val)
     with pytest.raises(lxml_etree.XMLSyntaxError, match=msg):
-        if isinstance(val, str):
-            read_xml(StringIO(val), parser="lxml")
-        else:
-            read_xml(BytesIO(val), parser="lxml")
+        read_xml(data, parser="lxml")
 
 
 @pytest.mark.parametrize("val", ["", b""])
 def test_empty_string_etree(val):
+    if isinstance(val, str):
+        data = StringIO(val)
+    else:
+        data = BytesIO(val)
     with pytest.raises(ParseError, match="no element found"):
-        if isinstance(val, str):
-            read_xml(StringIO(val), parser="etree")
-        else:
-            read_xml(BytesIO(val), parser="etree")
+        read_xml(data, parser="etree")
 
 
 def test_wrong_file_path(parser):
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
@@ -663,8 +663,8 @@ def test_grouped_box_multiple_axes_ax_error(self, hist_df):
         # GH 6970, GH 7069
         df = hist_df
         msg = "The number of passed axes must be 3, the same as the output plot"
+        _, axes = mpl.pyplot.subplots(2, 3)
         with pytest.raises(ValueError, match=msg):
-            fig, axes = mpl.pyplot.subplots(2, 3)
             # pass different number of axes from required
             with tm.assert_produces_warning(UserWarning):
                 axes = df.groupby("classroom").boxplot(ax=axes)
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
@@ -655,8 +655,8 @@ def test_hist_with_nans_and_weights(self):
         idxerror_weights = np.array([[0.3, 0.25], [0.45, 0.45]])
 
         msg = "weights must have the same shape as data, or be a single column"
+        _, ax2 = mpl.pyplot.subplots()
         with pytest.raises(ValueError, match=msg):
-            _, ax2 = mpl.pyplot.subplots()
             no_nan_df.plot.hist(ax=ax2, weights=idxerror_weights)
 
 
diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
@@ -72,8 +72,8 @@ def test_exceed_product_space(self):
         # GH31355: raise useful error when produce space is too large
         msg = "Product space too large to allocate arrays!"
 
+        dims = [np.arange(0, 22, dtype=np.int16) for i in range(12)] + [
+            (np.arange(15128, dtype=np.int16)),
+        ]
         with pytest.raises(ValueError, match=msg):
-            dims = [np.arange(0, 22, dtype=np.int16) for i in range(12)] + [
-                (np.arange(15128, dtype=np.int16)),
-            ]
             cartesian_product(X=dims)
diff --git a/pandas/tests/series/methods/test_between.py b/pandas/tests/series/methods/test_between.py
@@ -70,6 +70,6 @@ def test_between_error_args(self, inclusive):
             "'left', 'right', or 'neither'."
         )
 
+        series = Series(date_range("1/1/2000", periods=10))
         with pytest.raises(ValueError, match=value_error_msg):
-            series = Series(date_range("1/1/2000", periods=10))
             series.between(left, right, inclusive=inclusive)
diff --git a/pandas/tests/series/methods/test_compare.py b/pandas/tests/series/methods/test_compare.py
@@ -99,19 +99,19 @@ def test_compare_multi_index():
     tm.assert_series_equal(result, expected)
 
 
-def test_compare_unaligned_objects():
-    # test Series with different indices
+def test_compare_different_indices():
     msg = "Can only compare identically-labeled Series objects"
+    ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
+    ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
     with pytest.raises(ValueError, match=msg):
-        ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
-        ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
         ser1.compare(ser2)
 
-    # test Series with different lengths
+
+def test_compare_different_lengths():
     msg = "Can only compare identically-labeled Series objects"
+    ser1 = pd.Series([1, 2, 3])
+    ser2 = pd.Series([1, 2, 3, 4])
     with pytest.raises(ValueError, match=msg):
-        ser1 = pd.Series([1, 2, 3])
-        ser2 = pd.Series([1, 2, 3, 4])
         ser1.compare(ser2)
 
 
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -2254,12 +2254,12 @@ def test_dataframe_coerce(self, cache):
         expected = Series([Timestamp("20150204 00:00:00"), NaT])
         tm.assert_series_equal(result, expected)
 
-    def test_dataframe_extra_keys_raisesm(self, df, cache):
+    def test_dataframe_extra_keys_raises(self, df, cache):
         # extra columns
         msg = r"extra keys have been passed to the datetime assemblage: \[foo\]"
+        df2 = df.copy()
+        df2["foo"] = 1
         with pytest.raises(ValueError, match=msg):
-            df2 = df.copy()
-            df2["foo"] = 1
             to_datetime(df2, cache=cache)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py
@@ -157,13 +157,13 @@ def test_rolling_forward_window(
     indexer = FixedForwardWindowIndexer(window_size=3)
 
     match = "Forward-looking windows can't have center=True"
+    rolling = frame_or_series(values).rolling(window=indexer, center=True)
     with pytest.raises(ValueError, match=match):
-        rolling = frame_or_series(values).rolling(window=indexer, center=True)
         getattr(rolling, func)()
 
     match = "Forward-looking windows don't support setting the closed argument"
+    rolling = frame_or_series(values).rolling(window=indexer, closed="right")
     with pytest.raises(ValueError, match=match):
-        rolling = frame_or_series(values).rolling(window=indexer, closed="right")
         getattr(rolling, func)()
 
     rolling = frame_or_series(values).rolling(window=indexer, min_periods=2, step=step)

Original file line number	Diff line number	Diff line change
`@@ -70,6 +70,6 @@ def test_between_error_args(self, inclusive):`
`70`	`70`	`"'left', 'right', or 'neither'."`
`71`	`71`	`)`
`72`	`72`
	`73`	`+ series = Series(date_range("1/1/2000", periods=10))`
`73`	`74`	`with pytest.raises(ValueError, match=value_error_msg):`
`74`		`- series = Series(date_range("1/1/2000", periods=10))`
`75`	`75`	`series.between(left, right, inclusive=inclusive)`