From 9c823a5b9d4b72d25b80c6dc17199b92f484354e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 5 Jan 2024 10:51:03 -0800 Subject: [PATCH] TST: Scope pytest.raises closer to failing line --- pandas/tests/computation/test_eval.py | 7 +++---- pandas/tests/frame/methods/test_compare.py | 14 +++++++------- pandas/tests/frame/methods/test_sample.py | 6 ++---- pandas/tests/frame/methods/test_tz_convert.py | 12 +++++++----- .../tests/groupby/aggregate/test_aggregate.py | 2 +- .../indexes/datetimes/test_partial_slicing.py | 2 +- pandas/tests/indexing/test_na_indexing.py | 1 - pandas/tests/indexing/test_scalar.py | 2 +- pandas/tests/internals/test_internals.py | 2 +- pandas/tests/io/excel/test_openpyxl.py | 2 +- pandas/tests/io/parser/dtypes/test_empty.py | 3 +-- pandas/tests/io/parser/test_header.py | 3 +-- pandas/tests/io/test_pickle.py | 14 +++++++------- pandas/tests/io/test_stata.py | 10 ++++------ pandas/tests/io/xml/test_xml.py | 18 ++++++++++-------- pandas/tests/plotting/test_boxplot_method.py | 2 +- pandas/tests/plotting/test_hist_method.py | 2 +- pandas/tests/reshape/test_util.py | 6 +++--- pandas/tests/series/methods/test_between.py | 2 +- pandas/tests/series/methods/test_compare.py | 14 +++++++------- pandas/tests/tools/test_to_datetime.py | 6 +++--- pandas/tests/window/test_base_indexer.py | 4 ++-- 22 files changed, 65 insertions(+), 69 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index ed3ea1b0bd0dc..7969e684f5b04 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -141,8 +141,8 @@ class TestEval: def test_complex_cmp_ops(self, cmp1, cmp2, binop, lhs, rhs, engine, parser): if parser == "python" and binop in ["and", "or"]: msg = "'BoolOp' nodes are not implemented" + ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" with pytest.raises(NotImplementedError, match=msg): - ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" pd.eval(ex, engine=engine, parser=parser) return @@ -161,9 +161,8 @@ def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser): if parser == "python" and cmp_op in ["in", "not in"]: msg = "'(In|NotIn)' nodes are not implemented" - + ex = f"lhs {cmp_op} rhs" with pytest.raises(NotImplementedError, match=msg): - ex = f"lhs {cmp_op} rhs" pd.eval(ex, engine=engine, parser=parser) return @@ -193,8 +192,8 @@ def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser): def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser): if parser == "python" and op in ["in", "not in"]: msg = "'(In|NotIn)' nodes are not implemented" + ex = f"~(lhs {op} rhs)" with pytest.raises(NotImplementedError, match=msg): - ex = f"~(lhs {op} rhs)" pd.eval(ex, engine=engine, parser=parser) return diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index a4d0a7068a3a6..75e60a4816902 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -168,25 +168,25 @@ def test_compare_multi_index(align_axis): tm.assert_frame_equal(result, expected) -def test_compare_unaligned_objects(): - # test DataFrames with different indices +def test_compare_different_indices(): msg = ( r"Can only compare identically-labeled \(both index and columns\) DataFrame " "objects" ) + df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"]) + df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"]) with pytest.raises(ValueError, match=msg): - df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"]) - df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"]) df1.compare(df2) - # test DataFrames with different shapes + +def test_compare_different_shapes(): msg = ( r"Can only compare identically-labeled \(both index and columns\) DataFrame " "objects" ) + df1 = pd.DataFrame(np.ones((3, 3))) + df2 = pd.DataFrame(np.zeros((2, 1))) with pytest.raises(ValueError, match=msg): - df1 = pd.DataFrame(np.ones((3, 3))) - df2 = pd.DataFrame(np.zeros((2, 1))) df1.compare(df2) diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 6b3459fbdc035..e65225a33a479 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -111,12 +111,10 @@ def test_sample_invalid_weight_lengths(self, obj): obj.sample(n=3, weights=[0, 1]) with pytest.raises(ValueError, match=msg): - bad_weights = [0.5] * 11 - obj.sample(n=3, weights=bad_weights) + obj.sample(n=3, weights=[0.5] * 11) with pytest.raises(ValueError, match="Fewer non-zero entries in p than size"): - bad_weight_series = Series([0, 0, 0.2]) - obj.sample(n=4, weights=bad_weight_series) + obj.sample(n=4, weights=Series([0, 0, 0.2])) def test_sample_negative_weights(self, obj): # Check won't accept negative weights diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py index bcb8e423980fd..90bec4dfb5be6 100644 --- a/pandas/tests/frame/methods/test_tz_convert.py +++ b/pandas/tests/frame/methods/test_tz_convert.py @@ -98,21 +98,23 @@ def test_tz_convert_and_localize(self, fn): tm.assert_index_equal(df3.index.levels[1], l1_expected) assert not df3.index.levels[1].equals(l1) - # Bad Inputs - + @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"]) + def test_tz_convert_and_localize_bad_input(self, fn): + int_idx = Index(range(5)) + l0 = date_range("20140701", periods=5, freq="D") # Not DatetimeIndex / PeriodIndex + df = DataFrame(index=int_idx) with pytest.raises(TypeError, match="DatetimeIndex"): - df = DataFrame(index=int_idx) getattr(df, fn)("US/Pacific") # Not DatetimeIndex / PeriodIndex + df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) with pytest.raises(TypeError, match="DatetimeIndex"): - df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) getattr(df, fn)("US/Pacific", level=0) # Invalid level + df = DataFrame(index=l0) with pytest.raises(ValueError, match="not valid"): - df = DataFrame(index=l0) getattr(df, fn)("US/Pacific", level=1) @pytest.mark.parametrize("copy", [True, False]) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 5a69c26f2ab16..8198cc532d998 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -466,8 +466,8 @@ def numpystd(x): # this uses column selection & renaming msg = r"nested renamer is not supported" + d = {"C": "mean", "D": {"foo": "mean", "bar": "std"}} with pytest.raises(SpecificationError, match=msg): - d = {"C": "mean", "D": {"foo": "mean", "bar": "std"}} grouped.aggregate(d) # But without renaming, these functions are OK diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 0ebb88afb6c86..18a5d7db3753e 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -450,8 +450,8 @@ def test_getitem_with_datestring_with_UTC_offset(self, start, end): with pytest.raises(ValueError, match="Both dates must"): df[start : end[:-4] + "1:00"] + df = df.tz_localize(None) with pytest.raises(ValueError, match="The index must be timezone"): - df = df.tz_localize(None) df[start:end] def test_slice_reduce_to_series(self): diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index 5364cfe852430..d4ad350a64e4d 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -54,7 +54,6 @@ def test_series_mask_boolean(values, dtype, mask, indexer_class, frame): msg = "iLocation based boolean indexing cannot use an indexable as a mask" with pytest.raises(ValueError, match=msg): result = obj.iloc[mask] - tm.assert_equal(result, expected) else: result = obj.iloc[mask] tm.assert_equal(result, expected) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index ef4cd402aaf24..a51334c03a302 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -53,8 +53,8 @@ def test_iat_set_ints(self, dtype, frame_or_series): def test_iat_set_other(self, index, frame_or_series): f = frame_or_series(range(len(index)), index=index) msg = "iAt based indexing can only have integer indexers" + idx = next(generate_indices(f, False)) with pytest.raises(ValueError, match=msg): - idx = next(generate_indices(f, False)) f.iat[idx] = 1 @pytest.mark.parametrize( diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 66dd893df51de..c9708bfea7106 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -381,8 +381,8 @@ def test_duplicate_ref_loc_failure(self): msg = "Gaps in blk ref_locs" + mgr = BlockManager(blocks, axes) with pytest.raises(AssertionError, match=msg): - mgr = BlockManager(blocks, axes) mgr._rebuild_blknos_and_blklocs() blocks[0].mgr_locs = BlockPlacement(np.array([0])) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 2df9ec9e53516..c20c6daf92931 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -269,8 +269,8 @@ def test_if_sheet_exists_raises(ext, if_sheet_exists, msg): # GH 40230 df = DataFrame({"fruit": ["pear"]}) with tm.ensure_clean(ext) as f: + df.to_excel(f, sheet_name="foo", engine="openpyxl") with pytest.raises(ValueError, match=re.escape(msg)): - df.to_excel(f, sheet_name="foo", engine="openpyxl") with ExcelWriter( f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists ) as writer: diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py index f34385b190c5f..609c4cbe77fc8 100644 --- a/pandas/tests/io/parser/dtypes/test_empty.py +++ b/pandas/tests/io/parser/dtypes/test_empty.py @@ -125,8 +125,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): expected.index = expected.index.astype(object) with pytest.raises(ValueError, match="Duplicate names"): - data = "" - parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"}) + parser.read_csv(StringIO(""), names=["one", "one"], dtype={0: "u1", 1: "f"}) @pytest.mark.parametrize( diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 0dbd4e3569ad6..d185e83bfc027 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -32,8 +32,7 @@ def test_read_with_bad_header(all_parsers): msg = r"but only \d+ lines in file" with pytest.raises(ValueError, match=msg): - s = StringIO(",,") - parser.read_csv(s, header=[10]) + parser.read_csv(StringIO(",,"), header=[10]) def test_negative_header(all_parsers): diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 4e1f09b929224..f6c7f66abe5d3 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -309,13 +309,13 @@ def test_write_explicit(self, compression, get_random_path): @pytest.mark.parametrize("compression", ["", "None", "bad", "7z"]) def test_write_explicit_bad(self, compression, get_random_path): - with pytest.raises(ValueError, match="Unrecognized compression type"): - with tm.ensure_clean(get_random_path) as path: - df = DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=Index(list("ABCD"), dtype=object), - index=Index([f"i-{i}" for i in range(30)], dtype=object), - ) + df = DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=Index(list("ABCD"), dtype=object), + index=Index([f"i-{i}" for i in range(30)], dtype=object), + ) + with tm.ensure_clean(get_random_path) as path: + with pytest.raises(ValueError, match="Unrecognized compression type"): df.to_pickle(path, compression=compression) def test_write_infer(self, compression_ext, get_random_path): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 3e4e1a107da9d..799b0a63feb53 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -957,20 +957,18 @@ def test_drop_column(self, datapath): msg = "columns contains duplicate entries" with pytest.raises(ValueError, match=msg): - columns = ["byte_", "byte_"] read_stata( datapath("io", "data", "stata", "stata6_117.dta"), convert_dates=True, - columns=columns, + columns=["byte_", "byte_"], ) msg = "The following columns were not found in the Stata data set: not_found" with pytest.raises(ValueError, match=msg): - columns = ["byte_", "int_", "long_", "not_found"] read_stata( datapath("io", "data", "stata", "stata6_117.dta"), convert_dates=True, - columns=columns, + columns=["byte_", "int_", "long_", "not_found"], ) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) @@ -2196,16 +2194,16 @@ def test_non_categorical_value_labels(): assert reader_value_labels == expected msg = "Can't create value labels for notY, it wasn't found in the dataset." + value_labels = {"notY": {7: "label1", 8: "label2"}} with pytest.raises(KeyError, match=msg): - value_labels = {"notY": {7: "label1", 8: "label2"}} StataWriter(path, data, value_labels=value_labels) msg = ( "Can't create value labels for Z, value labels " "can only be applied to numeric columns." ) + value_labels = {"Z": {1: "a", 2: "k", 3: "j", 4: "i"}} with pytest.raises(ValueError, match=msg): - value_labels = {"Z": {1: "a", 2: "k", 3: "j", 4: "i"}} StataWriter(path, data, value_labels=value_labels) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 6f429c1ecbf8a..5451f7b2f16f5 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -471,20 +471,22 @@ def test_empty_string_lxml(val): r"None \(line 0\)", ] ) + if isinstance(val, str): + data = StringIO(val) + else: + data = BytesIO(val) with pytest.raises(lxml_etree.XMLSyntaxError, match=msg): - if isinstance(val, str): - read_xml(StringIO(val), parser="lxml") - else: - read_xml(BytesIO(val), parser="lxml") + read_xml(data, parser="lxml") @pytest.mark.parametrize("val", ["", b""]) def test_empty_string_etree(val): + if isinstance(val, str): + data = StringIO(val) + else: + data = BytesIO(val) with pytest.raises(ParseError, match="no element found"): - if isinstance(val, str): - read_xml(StringIO(val), parser="etree") - else: - read_xml(BytesIO(val), parser="etree") + read_xml(data, parser="etree") def test_wrong_file_path(parser): diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 76f7fa1f22eec..2470aae78d701 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -663,8 +663,8 @@ def test_grouped_box_multiple_axes_ax_error(self, hist_df): # GH 6970, GH 7069 df = hist_df msg = "The number of passed axes must be 3, the same as the output plot" + _, axes = mpl.pyplot.subplots(2, 3) with pytest.raises(ValueError, match=msg): - fig, axes = mpl.pyplot.subplots(2, 3) # pass different number of axes from required with tm.assert_produces_warning(UserWarning): axes = df.groupby("classroom").boxplot(ax=axes) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 4d17f87fdc7bc..0318abe7bdfac 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -655,8 +655,8 @@ def test_hist_with_nans_and_weights(self): idxerror_weights = np.array([[0.3, 0.25], [0.45, 0.45]]) msg = "weights must have the same shape as data, or be a single column" + _, ax2 = mpl.pyplot.subplots() with pytest.raises(ValueError, match=msg): - _, ax2 = mpl.pyplot.subplots() no_nan_df.plot.hist(ax=ax2, weights=idxerror_weights) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 4d0be7464cb3d..d2971db3d7aa2 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -72,8 +72,8 @@ def test_exceed_product_space(self): # GH31355: raise useful error when produce space is too large msg = "Product space too large to allocate arrays!" + dims = [np.arange(0, 22, dtype=np.int16) for i in range(12)] + [ + (np.arange(15128, dtype=np.int16)), + ] with pytest.raises(ValueError, match=msg): - dims = [np.arange(0, 22, dtype=np.int16) for i in range(12)] + [ - (np.arange(15128, dtype=np.int16)), - ] cartesian_product(X=dims) diff --git a/pandas/tests/series/methods/test_between.py b/pandas/tests/series/methods/test_between.py index 3913419038876..e67eafbd118ce 100644 --- a/pandas/tests/series/methods/test_between.py +++ b/pandas/tests/series/methods/test_between.py @@ -70,6 +70,6 @@ def test_between_error_args(self, inclusive): "'left', 'right', or 'neither'." ) + series = Series(date_range("1/1/2000", periods=10)) with pytest.raises(ValueError, match=value_error_msg): - series = Series(date_range("1/1/2000", periods=10)) series.between(left, right, inclusive=inclusive) diff --git a/pandas/tests/series/methods/test_compare.py b/pandas/tests/series/methods/test_compare.py index 304045e46702b..2a57d5139b62c 100644 --- a/pandas/tests/series/methods/test_compare.py +++ b/pandas/tests/series/methods/test_compare.py @@ -99,19 +99,19 @@ def test_compare_multi_index(): tm.assert_series_equal(result, expected) -def test_compare_unaligned_objects(): - # test Series with different indices +def test_compare_different_indices(): msg = "Can only compare identically-labeled Series objects" + ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"]) + ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"]) with pytest.raises(ValueError, match=msg): - ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"]) - ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"]) ser1.compare(ser2) - # test Series with different lengths + +def test_compare_different_lengths(): msg = "Can only compare identically-labeled Series objects" + ser1 = pd.Series([1, 2, 3]) + ser2 = pd.Series([1, 2, 3, 4]) with pytest.raises(ValueError, match=msg): - ser1 = pd.Series([1, 2, 3]) - ser2 = pd.Series([1, 2, 3, 4]) ser1.compare(ser2) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4a012f34ddc3b..806a498b98853 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2254,12 +2254,12 @@ def test_dataframe_coerce(self, cache): expected = Series([Timestamp("20150204 00:00:00"), NaT]) tm.assert_series_equal(result, expected) - def test_dataframe_extra_keys_raisesm(self, df, cache): + def test_dataframe_extra_keys_raises(self, df, cache): # extra columns msg = r"extra keys have been passed to the datetime assemblage: \[foo\]" + df2 = df.copy() + df2["foo"] = 1 with pytest.raises(ValueError, match=msg): - df2 = df.copy() - df2["foo"] = 1 to_datetime(df2, cache=cache) @pytest.mark.parametrize( diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 104acc1d527cb..4f91e56a7d82b 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -157,13 +157,13 @@ def test_rolling_forward_window( indexer = FixedForwardWindowIndexer(window_size=3) match = "Forward-looking windows can't have center=True" + rolling = frame_or_series(values).rolling(window=indexer, center=True) with pytest.raises(ValueError, match=match): - rolling = frame_or_series(values).rolling(window=indexer, center=True) getattr(rolling, func)() match = "Forward-looking windows don't support setting the closed argument" + rolling = frame_or_series(values).rolling(window=indexer, closed="right") with pytest.raises(ValueError, match=match): - rolling = frame_or_series(values).rolling(window=indexer, closed="right") getattr(rolling, func)() rolling = frame_or_series(values).rolling(window=indexer, min_periods=2, step=step)