From 9ddaf90633eb654ce3618d96ea39720a49bf12ba Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Jan 2024 18:20:37 -0800 Subject: [PATCH 1/2] Reuse more fixtures: --- pandas/tests/arrays/test_timedeltas.py | 9 ++++----- pandas/tests/computation/test_eval.py | 16 ++++++++-------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index a3f15467feb14..bcc52f197ee51 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -194,18 +194,17 @@ def test_add_timedeltaarraylike(self, tda): class TestTimedeltaArray: - @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) - def test_astype_int(self, dtype): + def test_astype_int(self, any_int_numpy_dtype): arr = TimedeltaArray._from_sequence( [Timedelta("1h"), Timedelta("2h")], dtype="m8[ns]" ) - if np.dtype(dtype) != np.int64: + if np.dtype(any_int_numpy_dtype) != np.int64: with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"): - arr.astype(dtype) + arr.astype(any_int_numpy_dtype) return - result = arr.astype(dtype) + result = arr.astype(any_int_numpy_dtype) expected = arr._ndarray.view("i8") tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 17630f14b08c7..ed3ea1b0bd0dc 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -606,11 +606,10 @@ def test_unary_in_array(self): ) tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("expr", ["x < -0.1", "-5 > x"]) - def test_float_comparison_bin_op(self, dtype, expr): + def test_float_comparison_bin_op(self, float_numpy_dtype, expr): # GH 16363 - df = DataFrame({"x": np.array([0], dtype=dtype)}) + df = DataFrame({"x": np.array([0], dtype=float_numpy_dtype)}) res = df.eval(expr) assert res.values == np.array([False]) @@ -747,15 +746,16 @@ class TestTypeCasting: @pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"]) # maybe someday... numexpr has too many upcasting rules now # chain(*(np.core.sctypes[x] for x in ['uint', 'int', 'float'])) - @pytest.mark.parametrize("dt", [np.float32, np.float64]) @pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")]) - def test_binop_typecasting(self, engine, parser, op, dt, left_right): - df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dt) + def test_binop_typecasting(self, engine, parser, op, float_numpy_dtype, left_right): + df = DataFrame( + np.random.default_rng(2).standard_normal((5, 3)), dtype=float_numpy_dtype + ) left, right = left_right s = f"{left} {op} {right}" res = pd.eval(s, engine=engine, parser=parser) - assert df.values.dtype == dt - assert res.values.dtype == dt + assert df.values.dtype == float_numpy_dtype + assert res.values.dtype == float_numpy_dtype tm.assert_frame_equal(res, eval(s)) From 8c2cc1dabd23afbc571916313b291b92ab5585e3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Jan 2024 18:44:09 -0800 Subject: [PATCH 2/2] TST/CLN: Reuse more existing fixtures --- pandas/tests/frame/indexing/test_indexing.py | 45 +++++++++---------- pandas/tests/frame/indexing/test_setitem.py | 15 +++---- pandas/tests/frame/methods/test_astype.py | 18 ++++---- pandas/tests/frame/test_arithmetic.py | 24 +++++----- .../tests/groupby/transform/test_transform.py | 1 - 5 files changed, 45 insertions(+), 58 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 1b83c048411a8..a1868919be685 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1682,16 +1682,15 @@ def exp_single_cats_value(self): ) return exp_single_cats_value - @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_loc_iloc_setitem_list_of_lists(self, orig, indexer): + def test_loc_iloc_setitem_list_of_lists(self, orig, indexer_li): # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() key = slice(2, 4) - if indexer is tm.loc: + if indexer_li is tm.loc: key = slice("j", "k") - indexer(df)[key, :] = [["b", 2], ["b", 2]] + indexer_li(df)[key, :] = [["b", 2], ["b", 2]] cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) @@ -1701,7 +1700,7 @@ def test_loc_iloc_setitem_list_of_lists(self, orig, indexer): df = orig.copy() with pytest.raises(TypeError, match=msg1): - indexer(df)[key, :] = [["c", 2], ["c", 2]] + indexer_li(df)[key, :] = [["c", 2], ["c", 2]] @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc, tm.at, tm.iat]) def test_loc_iloc_at_iat_setitem_single_value_in_categories( @@ -1722,32 +1721,30 @@ def test_loc_iloc_at_iat_setitem_single_value_in_categories( with pytest.raises(TypeError, match=msg1): indexer(df)[key] = "c" - @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) def test_loc_iloc_setitem_mask_single_value_in_categories( - self, orig, exp_single_cats_value, indexer + self, orig, exp_single_cats_value, indexer_li ): # mask with single True df = orig.copy() mask = df.index == "j" key = 0 - if indexer is tm.loc: + if indexer_li is tm.loc: key = df.columns[key] - indexer(df)[mask, key] = "b" + indexer_li(df)[mask, key] = "b" tm.assert_frame_equal(df, exp_single_cats_value) - @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_loc_iloc_setitem_full_row_non_categorical_rhs(self, orig, indexer): + def test_loc_iloc_setitem_full_row_non_categorical_rhs(self, orig, indexer_li): # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() key = 2 - if indexer is tm.loc: + if indexer_li is tm.loc: key = df.index[2] # not categorical dtype, but "b" _is_ among the categories for df["cat"] - indexer(df)[key, :] = ["b", 2] + indexer_li(df)[key, :] = ["b", 2] cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) values1 = [1, 1, 2, 1, 1, 1, 1] @@ -1756,23 +1753,22 @@ def test_loc_iloc_setitem_full_row_non_categorical_rhs(self, orig, indexer): # "c" is not among the categories for df["cat"] with pytest.raises(TypeError, match=msg1): - indexer(df)[key, :] = ["c", 2] + indexer_li(df)[key, :] = ["c", 2] - @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) def test_loc_iloc_setitem_partial_col_categorical_rhs( - self, orig, exp_parts_cats_col, indexer + self, orig, exp_parts_cats_col, indexer_li ): # assign a part of a column with dtype == categorical -> # exp_parts_cats_col df = orig.copy() key = (slice(2, 4), 0) - if indexer is tm.loc: + if indexer_li is tm.loc: key = (slice("j", "k"), df.columns[0]) # same categories as we currently have in df["cats"] compat = Categorical(["b", "b"], categories=["a", "b"]) - indexer(df)[key] = compat + indexer_li(df)[key] = compat tm.assert_frame_equal(df, exp_parts_cats_col) # categories do not match df["cat"]'s, but "b" is among them @@ -1780,32 +1776,31 @@ def test_loc_iloc_setitem_partial_col_categorical_rhs( with pytest.raises(TypeError, match=msg2): # different categories but holdable values # -> not sure if this should fail or pass - indexer(df)[key] = semi_compat + indexer_li(df)[key] = semi_compat # categories do not match df["cat"]'s, and "c" is not among them incompat = Categorical(list("cc"), categories=list("abc")) with pytest.raises(TypeError, match=msg2): # different values - indexer(df)[key] = incompat + indexer_li(df)[key] = incompat - @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) def test_loc_iloc_setitem_non_categorical_rhs( - self, orig, exp_parts_cats_col, indexer + self, orig, exp_parts_cats_col, indexer_li ): # assign a part of a column with dtype != categorical -> exp_parts_cats_col df = orig.copy() key = (slice(2, 4), 0) - if indexer is tm.loc: + if indexer_li is tm.loc: key = (slice("j", "k"), df.columns[0]) # "b" is among the categories for df["cat"] - indexer(df)[key] = ["b", "b"] + indexer_li(df)[key] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) # "c" not part of the categories with pytest.raises(TypeError, match=msg1): - indexer(df)[key] = ["c", "c"] + indexer_li(df)[key] = ["c", "c"] @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc, tm.iloc]) def test_getitem_preserve_object_index_with_dates(self, indexer): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 0e0f8cf61d3d7..3f13718cfc77a 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1000,13 +1000,12 @@ def test_setitem_slice_position(self): expected = DataFrame(arr) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) @pytest.mark.parametrize("n", [1, 2, 3]) - def test_setitem_slice_indexer_broadcasting_rhs(self, n, box, indexer): + def test_setitem_slice_indexer_broadcasting_rhs(self, n, box, indexer_si): # GH#40440 df = DataFrame([[1, 3, 5]] + [[2, 4, 6]] * n, columns=["a", "b", "c"]) - indexer(df)[1:] = box([10, 11, 12]) + indexer_si(df)[1:] = box([10, 11, 12]) expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) tm.assert_frame_equal(df, expected) @@ -1019,15 +1018,14 @@ def test_setitem_list_indexer_broadcasting_rhs(self, n, box): expected = DataFrame([[1, 3, 5]] + [[10, 11, 12]] * n, columns=["a", "b", "c"]) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("indexer", [tm.setitem, tm.iloc]) @pytest.mark.parametrize("box", [Series, np.array, list, pd.array]) @pytest.mark.parametrize("n", [1, 2, 3]) - def test_setitem_slice_broadcasting_rhs_mixed_dtypes(self, n, box, indexer): + def test_setitem_slice_broadcasting_rhs_mixed_dtypes(self, n, box, indexer_si): # GH#40440 df = DataFrame( [[1, 3, 5], ["x", "y", "z"]] + [[2, 4, 6]] * n, columns=["a", "b", "c"] ) - indexer(df)[1:] = box([10, 11, 12]) + indexer_si(df)[1:] = box([10, 11, 12]) expected = DataFrame( [[1, 3, 5]] + [[10, 11, 12]] * (n + 1), columns=["a", "b", "c"], @@ -1105,13 +1103,12 @@ def test_setitem_loc_only_false_indexer_dtype_changed(self, box): df.loc[indexer, ["b"]] = 9 tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc]) - def test_setitem_boolean_mask_aligning(self, indexer): + def test_setitem_boolean_mask_aligning(self, indexer_sl): # GH#39931 df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]}) expected = df.copy() mask = df["a"] >= 3 - indexer(df)[mask] = indexer(df)[mask].sort_values("a") + indexer_sl(df)[mask] = indexer_sl(df)[mask].sort_values("a") tm.assert_frame_equal(df, expected) def test_setitem_mask_categorical(self): diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index b73c759518b0e..eab8dbd2787f7 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -134,9 +134,8 @@ def test_astype_with_view_mixed_float(self, mixed_float_frame): tf.astype(np.int64) tf.astype(np.float32) - @pytest.mark.parametrize("dtype", [np.int32, np.int64]) @pytest.mark.parametrize("val", [np.nan, np.inf]) - def test_astype_cast_nan_inf_int(self, val, dtype): + def test_astype_cast_nan_inf_int(self, val, any_int_numpy_dtype): # see GH#14265 # # Check NaN and inf --> raise error when converting to int. @@ -144,7 +143,7 @@ def test_astype_cast_nan_inf_int(self, val, dtype): df = DataFrame([val]) with pytest.raises(ValueError, match=msg): - df.astype(dtype) + df.astype(any_int_numpy_dtype) def test_astype_str(self): # see GH#9757 @@ -323,9 +322,9 @@ def test_astype_categoricaldtype_class_raises(self, cls): with pytest.raises(TypeError, match=xpr): df["A"].astype(cls) - @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) - def test_astype_extension_dtypes(self, dtype): + def test_astype_extension_dtypes(self, any_int_ea_dtype): # GH#22578 + dtype = any_int_ea_dtype df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) expected1 = DataFrame( @@ -348,9 +347,9 @@ def test_astype_extension_dtypes(self, dtype): tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) - @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) - def test_astype_extension_dtypes_1d(self, dtype): + def test_astype_extension_dtypes_1d(self, any_int_ea_dtype): # GH#22578 + dtype = any_int_ea_dtype df = DataFrame({"a": [1.0, 2.0, 3.0]}) expected1 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)}) @@ -433,14 +432,13 @@ def test_astype_from_datetimelike_to_object(self, dtype, unit): else: assert result.iloc[0, 0] == Timedelta(1, unit=unit) - @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) - def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): + def test_astype_to_datetimelike_unit(self, any_real_numpy_dtype, dtype, unit): # tests all units from numeric origination # GH#19223 / GH#12425 dtype = f"{dtype}[{unit}]" - arr = np.array([[1, 2, 3]], dtype=arr_dtype) + arr = np.array([[1, 2, 3]], dtype=any_real_numpy_dtype) df = DataFrame(arr) result = df.astype(dtype) expected = DataFrame(arr.astype(dtype)) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index be6ed91973e80..d33a7cdcf21c3 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -304,8 +304,7 @@ def test_df_string_comparison(self): class TestFrameFlexComparisons: # TODO: test_bool_flex_frame needs a better name - @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"]) - def test_bool_flex_frame(self, op): + def test_bool_flex_frame(self, comparison_op): data = np.random.default_rng(2).standard_normal((5, 3)) other_data = np.random.default_rng(2).standard_normal((5, 3)) df = DataFrame(data) @@ -315,8 +314,8 @@ def test_bool_flex_frame(self, op): # DataFrame assert df.eq(df).values.all() assert not df.ne(df).values.any() - f = getattr(df, op) - o = getattr(operator, op) + f = getattr(df, comparison_op.__name__) + o = comparison_op # No NAs tm.assert_frame_equal(f(other), o(df, other)) # Unaligned @@ -459,25 +458,23 @@ def test_flex_comparison_nat(self): result = df.ne(pd.NaT) assert result.iloc[0, 0].item() is True - @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) - def test_df_flex_cmp_constant_return_types(self, opname): + def test_df_flex_cmp_constant_return_types(self, comparison_op): # GH 15077, non-empty DataFrame df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) const = 2 - result = getattr(df, opname)(const).dtypes.value_counts() + result = getattr(df, comparison_op.__name__)(const).dtypes.value_counts() tm.assert_series_equal( result, Series([2], index=[np.dtype(bool)], name="count") ) - @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) - def test_df_flex_cmp_constant_return_types_empty(self, opname): + def test_df_flex_cmp_constant_return_types_empty(self, comparison_op): # GH 15077 empty DataFrame df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) const = 2 empty = df.iloc[:0] - result = getattr(empty, opname)(const).dtypes.value_counts() + result = getattr(empty, comparison_op.__name__)(const).dtypes.value_counts() tm.assert_series_equal( result, Series([2], index=[np.dtype(bool)], name="count") ) @@ -664,11 +661,12 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(df.div(row), df / row) tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) - @pytest.mark.parametrize("dtype", ["int64", "float64"]) - def test_arith_flex_series_broadcasting(self, dtype): + def test_arith_flex_series_broadcasting(self, any_real_numpy_dtype): # broadcasting issue in GH 7325 - df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype=dtype) + df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype=any_real_numpy_dtype) expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + if any_real_numpy_dtype == "float32": + expected = expected.astype(any_real_numpy_dtype) result = df.div(df[0], axis="index") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index f7a4233b3ddc9..134a585651d72 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -706,7 +706,6 @@ def test_cython_transform_series(op, args, targop): @pytest.mark.parametrize("op", ["cumprod", "cumsum"]) -@pytest.mark.parametrize("skipna", [False, True]) @pytest.mark.parametrize( "input, exp", [